def main(): opt = parse_opts() ecd_name, cls_name = opt.model_name.split('-') ecd_model = get_encoder_net(ecd_name) cls_model = get_end_net(cls_name) cfg.encoder_model = ecd_name cfg.classification_model = cls_name if opt.debug: cfg.debug = opt.debug else: if opt.tensorboard == 'TEST': cfg.tensorboard = opt.model_name else: cfg.tensorboard = opt.tensorboard cfg.flag = opt.flag model = cls_model(cfg, encoder=CNNencoder( cfg, ecd_model(pretrained=True, path=opt.encoder_model))) cfg.video_path = os.path.join(cfg.root_path, cfg.video_path) cfg.annotation_path = os.path.join(cfg.root_path, cfg.annotation_path) cfg.list_all_member() torch.manual_seed(cfg.manual_seed) print('##########################################') print('####### model 仅支持单GPU') print('##########################################') model = model.cuda() print(model) criterion = nn.CrossEntropyLoss() if cfg.cuda: criterion = criterion.cuda() norm_method = Normalize([0, 0, 0], [1, 1, 1]) print('##########################################') print('####### train') print('##########################################') assert cfg.train_crop in ['random', 'corner', 'center'] if cfg.train_crop == 'random': crop_method = (cfg.scales, cfg.sample_size) elif cfg.train_crop == 'corner': crop_method = MultiScaleCornerCrop(cfg.scales, cfg.sample_size) elif cfg.train_crop == 'center': crop_method = MultiScaleCornerCrop(cfg.scales, cfg.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(cfg.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(cfg.sample_duration) target_transform = ClassLabel() training_data = get_training_set(cfg, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.n_threads, drop_last=False, pin_memory=True) optimizer = model.get_optimizer(lr1=cfg.lr, lr2=cfg.lr2) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=cfg.lr_patience) print('##########################################') print('####### val') print('##########################################') spatial_transform = Compose([ Scale(cfg.sample_size), CenterCrop(cfg.sample_size), ToTensor(cfg.norm_value), norm_method ]) temporal_transform = LoopPadding(cfg.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(cfg, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.n_threads, drop_last=False, pin_memory=True) print('##########################################') print('####### run') print('##########################################') if cfg.debug: logger = None else: path = get_log_dir(cfg.logdir, name=cfg.tensorboard, flag=cfg.flag) logger = Logger(logdir=path) cfg.save_config(path) for i in range(cfg.begin_epoch, cfg.n_epochs + 1): train_epoch(i, train_loader, model, criterion, optimizer, cfg, logger) validation_loss = val_epoch(i, val_loader, model, criterion, cfg, logger) scheduler.step(validation_loss)
def main_run(dataset, root_dir, checkpoint_path, seqLen, testBatchSize, memSize, outPool_size, split): mean=[0.485, 0.456, 0.406] std=[0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) test_split = split seqLen = seqLen memSize = memSize c_cam_classes = outPool_size dataset = dataset testBatchSize = testBatchSize checkpoint_path = checkpoint_path if dataset == 'gtea_61': num_classes = 61 elif dataset == 'gtea_71': num_classes = 71 elif dataset == 'egtea_gaze+': num_classes = 106 else: print('Wrong dataset') sys.exit() dataset_dir = os.path.join(root_dir, dataset) print('Preparing dataset...') if dataset == 'egtea_gaze+': trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames = gen_split_egtea_gazePlus(dataset_dir, test_split) else: trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames, _ = gen_split(dataset_dir, test_split) vid_seq_test = makeDataset(testDatasetF, testLabels, testNumFrames, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), fmt='.jpg', seqLen=seqLen) print('Number of test samples = {}'.format(vid_seq_test.__len__())) print("Dataset shape: ", len(vid_seq_test.__getitem__(0)), vid_seq_test.__getitem__(0)[0].shape , end='\n\n\n') test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=0, pin_memory=True) model = attentionModel(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes) if os.path.exists(checkpoint_path): print('Loading weights from checkpoint file {}'.format(checkpoint_path)) else: print('Checkpoint file {} does not exist'.format(checkpoint_path)) sys.exit() last_checkpoint = torch.load(checkpoint_path) #, map_location=torch.device('cpu')) model.load_state_dict(last_checkpoint['model_state_dict']) model.cuda() model.train(False) model.eval() print('Testing...') test_iter = 0 test_samples = 0 numCorr = 0 for j, (inputs, targets) in tqdm(enumerate(test_loader)): test_iter += 1 test_samples += inputs.size(0) with torch.no_grad(): print(inputs.shape, targets.shape) inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) output_label, _ = model(inputVariable) del inputVariable _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() test_accuracy = (numCorr.cpu().item() / test_samples) * 100 print('Test Accuracy after = {}%'.format(test_accuracy))
norm_method = Normalize(opt.mean, opt.std) if not opt.no_train: assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(opt.sample_duration) target_transform = ClassLabel() print("Getting the training set") training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) train_logger = Logger(os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( os.path.join(opt.result_path, 'train_batch.log'),
def get_val_utils(opt): normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [ Resize(opt.sample_size), CenterCrop(opt.sample_size), ToTensor() ] if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( TemporalEvenCrop(opt.sample_duration, opt.n_val_samples)) temporal_transform = TemporalCompose(temporal_transform) val_data_checkpoint_path = opt.result_path / Path('val_data_' + opt.dataset + '.data') val_collate_checkpoint_path = opt.result_path / Path('val_coll_' + opt.dataset + '.data') if os.path.exists(val_data_checkpoint_path) and os.path.exists( val_collate_checkpoint_path) and opt.save_load_data_checkpoint: with open(val_data_checkpoint_path, 'rb') as filehandle: val_data = pickle.load(filehandle) with open(val_collate_checkpoint_path, 'rb') as filehandle: collate_fn = pickle.load(filehandle) else: val_data, collate_fn = get_validation_data( opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) if opt.save_load_data_checkpoint: with open(val_data_checkpoint_path, 'wb') as filehandle: pickle.dump(val_data, filehandle) with open(val_collate_checkpoint_path, 'wb') as filehandle: pickle.dump(collate_fn, filehandle) if opt.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data, shuffle=False) else: val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=(opt.batch_size // opt.n_val_samples), shuffle=False, num_workers=opt.n_threads, pin_memory=True, sampler=val_sampler, worker_init_fn=worker_init_fn, collate_fn=collate_fn) if opt.is_master_node: val_logger = Logger(opt.result_path / 'val.log', ['epoch', 'loss', 'acc']) else: val_logger = None return val_loader, val_logger
# else: # optimizer = optim.Adam( # parameters, # lr=opt.learning_rate) # elif opt.optimizer == 'rmsprop': # optimizer = optim.RMSprop( # parameters, # lr=opt.learning_rate) # scheduler = lr_scheduler.ReduceLROnPlateau( # optimizer, 'min', patience=opt.lr_patience) if not opt.no_val: if opt.dataset in ['gtea', 'kth2']: spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method, ]) else: spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), RGB2Gray(), ToTensor(opt.norm_value), norm_method, ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() if opt.compress == 'mask': spatio_temporal_transform = Coded(opt.mask_path) elif opt.compress == 'avg':
def main_run(dataset, flowModel, rgbModel, stackSize, seqLen, memSize, trainDatasetDir, valDatasetDir, outDir, trainBatchSize, valBatchSize, lr1, numEpochs, decay_step, decay_factor): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() model_folder = os.path.join('./', outDir, dataset, 'twoStream') # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Dir {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize]) vid_seq_train = makeDataset(trainDatasetDir,spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', seqLen=seqLen) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if valDatasetDir is not None: vid_seq_val = makeDataset(valDatasetDir, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valSamples = vid_seq_val.__len__() model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=stackSize, memSize=memSize, num_classes=num_classes) for params in model.parameters(): params.requires_grad = False model.train(False) train_params = [] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.lstm_cell.parameters(): train_params += [params] params.requires_grad = True for params in model.frameModel.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.fc.parameters(): params.requires_grad = True train_params += [params] base_params = [] for params in model.flowModel.layer4.parameters(): base_params += [params] params.requires_grad = True model.cuda() trainSamples = vid_seq_train.__len__() min_accuracy = 0 loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.SGD([ {'params': train_params}, {'params': base_params, 'lr': 1e-4}, ], lr=lr1, momentum=0.9, weight_decay=5e-4) optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=decay_step, gamma=decay_factor) train_iter = 0 for epoch in range(numEpochs): optim_scheduler.step() epoch_loss = 0 numCorrTrain = 0 iterPerEpoch = 0 model.classifier.train(True) model.flowModel.layer4.train(True) for j, (inputFlow, inputFrame, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariableFlow = Variable(inputFlow.cuda()) inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) output_label = model(inputVariableFlow, inputVariableFrame) loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epoch_loss += loss.item() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = torch.true_divide(numCorrTrain,trainSamples) * 100 print('Average training loss after {} epoch = {} '.format(epoch + 1, avg_loss)) print('Training accuracy after {} epoch = {}% '.format(epoch + 1, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch + 1, trainAccuracy)) if valDatasetDir is not None: if (epoch + 1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 numCorr = 0 for j, (inputFlow, inputFrame, targets) in enumerate(val_loader): val_iter += 1 inputVariableFlow = Variable(inputFlow.cuda()) inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) output_label = model(inputVariableFlow, inputVariableFrame) loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable) val_loss_epoch += loss.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == labelVariable.data).sum() val_accuracy = torch.true_divide(numCorr,valSamples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val Loss after {} epochs, loss = {}'.format(epoch + 1, avg_val_loss)) print('Val Accuracy after {} epochs = {}%'.format(epoch + 1, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_twoStream_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch + 1) % 10 == 0: save_path_model = (model_folder + '/model_twoStream_state_dict_epoch' + str(epoch + 1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen, memSize): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) testBatchSize = 1 spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=2, pin_memory=True) model = twoStreamAttentionModel(stackSize=5, memSize=512, num_classes=num_classes) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorrTwoStream = 0 predicted_labels = [] true_labels = [] with torch.no_grad(): for j, (inputFlow, inputFrame, targets) in enumerate(test_loader): inputVariableFrame = Variable( inputFrame.permute(1, 0, 2, 3, 4).cuda()) inputVariableFlow = Variable(inputFlow.cuda()) output_label = model(inputVariableFlow, inputVariableFrame) _, predictedTwoStream = torch.max(output_label.data, 1) numCorrTwoStream += (predictedTwoStream == targets.cuda()).sum() predicted_labels.append(predictedTwoStream.item()) true_labels.append(targets.item()) test_accuracyTwoStream = torch.true_divide(numCorrTwoStream, test_samples) * 100 #Debugging output ''' print(f'The class of predicted is {type(predicted_labels)}') print(f'The class of the true labels is {type(true_labels)}') print(f'Predicted {np.array(predicted_labels).shape}') print(f'True {np.array(true_labels).shape}') print(predicted_labels) print(true_labels)''' #End of debugging output cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] print('Accuracy {:.02f}%'.format(test_accuracyTwoStream)) ticks = np.linspace(0, 60, num=61) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(ticks, fontsize=6) plt.yticks(ticks, fontsize=6) plt.grid(True) plt.clim(0, 1) plt.savefig(dataset + '-twoStreamJoint.jpg', bbox_inches='tight') plt.show()
last_fc=True) # In[10]: model.load_state_dict(out_state_dict) # In[11]: model.eval() # In[169]: spatial_transform = Compose([ Scale(sample_size), CenterCrop(sample_size), ToTensor(), Normalize(mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(sample_duration) # In[ ]: test_video = os.path.join('test_videos', args['video']) # In[170]: subprocess.call('mkdir tmp', shell=True) subprocess.call('ffmpeg -i {} tmp/image_%05d.jpg'.format(test_video), shell=True) # In[173]:
def get_ucf_data(opt): mean = get_mean(opt.norm_value, dataset='kinetics') std = get_std(opt.norm_value) norm_method = Normalize(mean, [1, 1, 1]) spatial_transform = Compose([ Scale(opt.sample_size), CornerCrop(opt.sample_size, 'c'), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() # VideoID() # get training data training_data = UCF101(opt.video_path, opt.annotation_path, 'training', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) # wrap training data train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) # True # get validation data val_data = UCF101(opt.video_path, opt.annotation_path, 'validation', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) # wrap validation data val_loader = torch.utils.data.DataLoader(val_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) target_transform = VideoID() # get test data test_data = UCF101(opt.video_path, opt.annotation_path, 'testing', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) # wrap test data test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) return train_loader, val_loader, test_loader, test_data
def train_main_multi_batch(model, input_root_dir, opt): #### epoch_logger = logging.getLogger('info') batch_logger = logging.getLogger('info') elogHandler = logging.StreamHandler() eformatter = jsonlogger.JsonFormatter() elogHandler.setFormatter(eformatter) epoch_logger.addHandler(elogHandler) blogHandler = logging.StreamHandler() bformatter = jsonlogger.JsonFormatter() blogHandler.setFormatter(bformatter) batch_logger.addHandler(blogHandler) spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() if not opt.no_cuda: criterion = criterion.cuda() optimizer = optim.Adam(model.parameters(), lr=1e-3) epoch = 1 model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() accuracies = AverageMeter() end_time = time.time() ii = 0 previous_label = "FAKE" pre_previous_label = "FAKE" for files_dir in os.listdir(input_root_dir): sub_path = os.path.join(input_root_dir, files_dir) print("Files dir: " + files_dir) print("Sub path:" + sub_path) data_file_path = os.path.join(sub_path, 'metadata.json') with open(data_file_path, 'r') as data_file: labels = json.load(data_file) opt.batch_size = 36 total_batch_size = len(os.listdir(sub_path)) i = 0 input_files = os.listdir(sub_path) for inp_num in range(1, len(input_files), 2): print("Lala: " + str(inp_num)) # print(input_files) input_file1 = input_files[inp_num] input_file2 = input_files[inp_num - 1] if input_file1.endswith(".mp4") and input_file2.endswith(".mp4"): video_path1 = os.path.join(sub_path, input_file1) video_path2 = os.path.join(sub_path, input_file2) label1 = labels[input_file1] label2 = labels[input_file2] if label1['label'] != previous_label or label1[ 'label'] != pre_previous_label: previous_label = label1['label'] subprocess.call('mkdir tmp', shell=True) subprocess.call( 'ffmpeg -hide_banner -loglevel panic -i {} -vframes 288 tmp/image_%05d.jpg' .format(video_path1), shell=True) subprocess.call( 'ffmpeg -hide_banner -loglevel panic -i {} -vframes 288 -start_number 289 tmp/image_%05d.jpg' .format(video_path2), shell=True) video_dir = '{}tmp/'.format( '/data/codebases/video_classification/') data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader( data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) for k, (inputs, targets) in enumerate(data_loader): data_time.update(time.time() - end_time) print("Label: " + label1['label'] + ", " + label2['label']) # # FOR CROSS ENTROPY LOSS # targets = torch.zeros([18, 1], dtype=torch.long) # for j in range(0,18): # if(label['label'] == 'FAKE'): # targets[j][0] = 0 # # targets[j][1] = 1 # else: # targets[j][0] = 1 # # targets[j][1] = 0 # FOR MSE LOSS targets = torch.zeros([opt.batch_size, opt.n_classes], dtype=torch.float) for j in range(0, int(opt.batch_size / 2)): if (label1['label'] == 'FAKE'): targets[j][0] = 0.0 targets[j][1] = 1.0 else: targets[j][0] = 1.0 targets[j][1] = 0.0 for j in range(int(opt.batch_size / 2), opt.batch_size): if (label2['label'] == 'FAKE'): targets[j][0] = 0.0 targets[j][1] = 1.0 else: targets[j][0] = 1.0 targets[j][1] = 0.0 if not opt.no_cuda: targets = targets.cuda(non_blocking=True) inputs = Variable(inputs) targets = Variable(targets) outputs = model(inputs) print(outputs.t()) print(targets.t()) # FOR CROSS ENTROPY LOSS # loss = criterion(outputs, torch.max(targets, 1)[1]) # FOR MSE LOSS loss = criterion(outputs, targets) print(loss) # FOR CROSS ENTROPY LOSS # acc = calculate_accuracy(outputs, targets) # FOR MSE LOSS acc = calculate_accuracy_mse(outputs, targets) print(acc) try: losses.update(loss.data[0], inputs.size(0)) except: losses.update(loss.data, inputs.size(0)) accuracies.update(acc, inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end_time) end_time = time.time() batch_logger.log( 1, { 'epoch': epoch, 'batch': i + 1, 'iter': (epoch - 1) * opt.batch_size + (i + 1), 'loss': losses.val, 'acc': accuracies.val, 'lr': optimizer.param_groups[0]['lr'] }) print( 'Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i + 1, opt.batch_size, batch_time=batch_time, data_time=data_time, loss=losses, acc=accuracies)) ii += 1 subprocess.call('rm -rf tmp', shell=True) i += 1 if ii % 100 == 0: save_loc = '/data/codebases/video_classification/model{}.pth'.format( ii) torch.save(model.state_dict(), save_loc) epoch_logger.log( 1, { 'epoch': epoch, 'loss': losses.avg, 'acc': accuracies.avg, 'lr': optimizer.param_groups[0]['lr'] }) print('XXX Epoch: [{0}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(epoch, i + 1, opt.batch_size, batch_time=batch_time, data_time=data_time, loss=losses, acc=accuracies)) exit(1)
if __name__ == '__main__': opt = parse_opts() opt.mean = get_mean(1) opt.arch = '{}-{}'.format(opt.model_name, opt.model_depth) opt.sample_duration = 16 opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) print('#####', opt.scales) print(opt.mean) spatial_transform = Compose([ MultiScaleCornerCrop(opt.scales, opt.sample_size), RandomHorizontalFlip(), ToTensor(1), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = TemporalRandomCrop(opt.sample_duration) train_data = Video(opt.train_list, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration, n_samples_for_each_video=1) train_loader = torch.utils.data.DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) val_spatial_transform = Compose([
def main_run(stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, color, rgbm, fcm): #dataset = 'gtea61' begin_time = datetime.datetime.now() num_classes = 61 if color not in ['HSV_opticalFlow', 'flow_surfaceNormals', 'warpedHSV']: print(color, ' is not valid') exit(-1) model_folder = os.path.join( './', out_dir, 'BigConvLSTM', color, str(seqLen), 'stage' + str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vid_seq_train = makeDataset(train_data_dir, seqLen=seqLen, fmt='.png', users=['S1', 'S3', 'S4'], spatial_transform=spatial_transform, colorization=color) #trainInstances = vid_seq_train.__len__() train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if val_data_dir is not None: vid_seq_val = makeDataset(val_data_dir, seqLen=seqLen, fmt='.png', users=['S2'], train=False, spatial_transform=Compose([ Scale(256), CenterCrop(224), ToTensor(), normalize ]), colorization=color) #valInstances = vid_seq_val.__len__() val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) train_params = [] if stage == 1: model = bigConvLSTM(num_classes=num_classes, mem_size=memSize, rgbm=rgbm, fcm=fcm) model.train(False) for params in model.parameters(): params.requires_grad = False else: # stage == 2 model = bigConvLSTM(num_classes=num_classes, mem_size=memSize, rgbm=rgbm, fcm=fcm) model.load_state_dict(torch.load(stage1_dict)) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNetRGB.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNetRGB.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNetRGB.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNetRGB.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNetRGB.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNetRGB.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNetRGB.fc.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNetCol.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNetCol.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNetCol.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNetCol.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNetCol.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNetCol.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNetCol.fc.parameters(): params.requires_grad = True train_params += [params] model.resNetRGB.layer4[0].conv1.train(True) model.resNetRGB.layer4[0].conv2.train(True) model.resNetRGB.layer4[1].conv1.train(True) model.resNetRGB.layer4[1].conv2.train(True) model.resNetRGB.layer4[2].conv1.train(True) model.resNetRGB.layer4[2].conv2.train(True) model.resNetRGB.fc.train(True) model.resNetCol.layer4[0].conv1.train(True) model.resNetCol.layer4[0].conv2.train(True) model.resNetCol.layer4[1].conv1.train(True) model.resNetCol.layer4[1].conv2.train(True) model.resNetCol.layer4[2].conv1.train(True) model.resNetCol.layer4[2].conv2.train(True) model.resNetCol.fc.train(True) for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) print(device) loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 dataload_time = datetime.datetime.now() for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.lstm_cell.train(True) model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1) if stage == 2: model.resNetRGB.layer4[0].conv1.train(True) model.resNetRGB.layer4[0].conv2.train(True) model.resNetRGB.layer4[1].conv1.train(True) model.resNetRGB.layer4[1].conv2.train(True) model.resNetRGB.layer4[2].conv1.train(True) model.resNetRGB.layer4[2].conv2.train(True) model.resNetRGB.fc.train(True) model.resNetCol.layer4[0].conv1.train(True) model.resNetCol.layer4[0].conv2.train(True) model.resNetCol.layer4[1].conv1.train(True) model.resNetCol.layer4[1].conv2.train(True) model.resNetCol.layer4[2].conv1.train(True) model.resNetCol.layer4[2].conv2.train(True) model.resNetCol.fc.train(True) #for i, (inputs, targets) in enumerate(train_loader): for inputsRGB, inputsCol, targets in train_loader: train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariableRGB = Variable( inputsRGB.permute(1, 0, 2, 3, 4).to(device)) inputVariableCol = Variable( inputsCol.permute(1, 0, 2, 3, 4).to(device)) labelVariable = Variable(targets.to(device)) trainSamples += inputsRGB.size(0) output_label, _ = model(inputVariableRGB, inputVariableCol, device) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.to(device)).sum() epoch_loss += loss.item() optim_scheduler.step() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if val_data_dir is not None: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 with torch.no_grad(): #for j, (inputs, targets) in enumerate(val_loader): for inputsRGB, inputsCol, targets in val_loader: val_iter += 1 val_samples += inputsRGB.size(0) inputVariableRGB = Variable( inputsRGB.permute(1, 0, 2, 3, 4).to(device)) inputVariableCol = Variable( inputsCol.permute(1, 0, 2, 3, 4).to(device)) labelVariable = Variable(targets.to(device)) #labelVariable = Variable(targets.cuda()) output_label, _ = model(inputVariableRGB, inputVariableCol, device) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.to(device)).sum() val_accuracy = torch.true_divide(numCorr, val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format( epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_' + color + '_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy print("saved new best model") train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close() end_time = datetime.datetime.now() print('total time elapsed: ', end_time - begin_time) print('dataload time: ', dataload_time - begin_time) print('training time: ', end_time - dataload_time) timers = open((model_folder + '/timings.txt'), 'w') timers.write( f"total time elapsed: {end_time-begin_time} \ndataload time: {dataload_time-begin_time} \ntraining time: {end_time-dataload_time}" ) timers.close()
def main_run(numEpochs, lr, stepSize, decayRate, trainBatchSize, seqLen, evalInterval, evalMode, numWorkers, outDir, modelUsed, pretrained, train_test_split, directory, crossValidation, folds): compDataset, classCount, class_names = make_split(directory) if crossValidation: data, label = compDataset kFoldCrossValid(folds, data, label, numEpochs, evalMode, numWorkers, lr, stepSize, decayRate, trainBatchSize, seqLen) else: (trainDataset, trainLabels), (validationDataset, validationLabels), (testDataset, testLabels) = sampleFromClass( compDataset, classCount, train_test_split) model, accuracy = modelTrain(modelUsed, pretrained, trainDataset, trainLabels, validationDataset, validationLabels, numEpochs, evalInterval, evalMode, outDir, numWorkers, lr, stepSize, decayRate, trainBatchSize, seqLen, True) '''for printing confusion matrix''' mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) if evalMode == 'centerCrop': test_spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) elif evalMode == 'tenCrops': test_spatial_transform = Compose( [Scale(256), TenCrops(size=224, mean=mean, std=std)]) elif evalMode == 'fiveCrops': test_spatial_transform = Compose( [Scale(256), FiveCrops(size=224, mean=mean, std=std)]) elif evalMode == 'horFlip': test_spatial_transform = Compose([ Scale(256), CenterCrop(224), FlippedImagesTest(mean=mean, std=std) ]) vidSeqTest = makeDataset(testDataset, testLabels, seqLen=seqLen, spatial_transform=test_spatial_transform) testLoader = torch.utils.data.DataLoader(vidSeqTest, batch_size=1, shuffle=False, num_workers=int(numWorkers / 2), pin_memory=True) numTestInstances = vidSeqTest.__len__() print('Number of test samples = {}'.format(numTestInstances)) modelFolder = './experiments_' + outDir + '_' + modelUsed + '_' + str( pretrained) # Dir for saving models and log files savePathClassifier = (modelFolder + '/bestModel.pth') torch.save(model.state_dict(), savePathClassifier) '''running test samples and printing confusion matrix''' model.train(False) print('Testing...') LossEpoch = 0 testIter = 0 pred = None targ = None numCorrTest = 0 for j, (inputs, targets) in enumerate(testLoader): testIter += 1 #if evalMode == 'centerCrop': if (torch.cuda.is_available()): inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), requires_grad=False) labelVariable = Variable(targets.cuda(async=True), requires_grad=False) else: inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4), requires_grad=False) labelVariable = Variable(targets, requires_grad=False) # else: # if(torch.cuda.is_available()): # inputVariable1 = Variable(inputs[0].permute(1, 0, 2, 3, 4).cuda(), requires_grad=False) # labelVariable = Variable(targets.cuda(async=True), requires_grad=False) # else: # inputVariable1 = Variable(inputs[0].permute(1, 0, 2, 3, 4), requires_grad=False) # labelVariable = Variable(targets, requires_grad=False) outputLabel = model(inputVariable1) outputProb = torch.nn.Softmax(dim=1)(outputLabel) _, predicted = torch.max(outputProb.data, 1) if pred is None: pred = predicted.cpu().numpy() targ = targets[0].cpu().numpy() else: pred = np.append(pred, predicted.cpu().numpy()) targ = np.append(targ, targets[0].cpu().numpy()) # if(torch.cuda.is_available()): # numCorrTest += (predicted == targets[0].cuda()).sum() # else: # numCorrTest += (predicted == targets[0]).sum() # Compute confusion matrix cnf_matrix = confusion_matrix(targ, pred) np.set_printoptions(precision=2) # Plot non-normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix, without normalization') plt.savefig(modelFolder + "/no_norm_confusion_matrix.png") # Plot normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, title='Normalized confusion matrix') plt.savefig(modelFolder + "/confusion_matrix.png") return True
def modelTrain(modelUsed, pretrained, trainDataset, trainLabels, validationDataset, validationLabels, numEpochs, evalInterval, evalMode, outDir, numWorkers, lr, stepSize, decayRate, trainBatchSize, seqLen, plotting): mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vidSeqTrain = makeDataset(trainDataset, trainLabels, spatial_transform=spatial_transform, seqLen=seqLen) # torch iterator to give data in batches of specified size trainLoader = torch.utils.data.DataLoader(vidSeqTrain, batch_size=trainBatchSize, shuffle=True, num_workers=numWorkers, pin_memory=True, drop_last=True) if evalMode == 'centerCrop': test_spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) elif evalMode == 'tenCrops': test_spatial_transform = Compose( [Scale(256), TenCrops(size=224, mean=mean, std=std)]) elif evalMode == 'fiveCrops': test_spatial_transform = Compose( [Scale(256), FiveCrops(size=224, mean=mean, std=std)]) elif evalMode == 'horFlip': test_spatial_transform = Compose([ Scale(256), CenterCrop(224), FlippedImagesTest(mean=mean, std=std) ]) vidSeqValid = makeDataset(validationDataset, validationLabels, seqLen=seqLen, spatial_transform=test_spatial_transform) validationLoader = torch.utils.data.DataLoader(vidSeqValid, batch_size=1, shuffle=False, num_workers=int(numWorkers / 2), pin_memory=True) numTrainInstances = vidSeqTrain.__len__() numValidationInstances = vidSeqValid.__len__() print('Number of training samples = {}'.format(numTrainInstances)) print('Number of validation samples = {}'.format(numValidationInstances)) modelFolder = './experiments_' + outDir + '_' + modelUsed + '_' + str( pretrained) # Dir for saving models and log files # Create the dir if os.path.exists(modelFolder): pass else: os.makedirs(modelFolder) # Log files writer = SummaryWriter(modelFolder) trainLogLoss = open((modelFolder + '/trainLogLoss.txt'), 'a') trainLogAcc = open((modelFolder + '/trainLogAcc.txt'), 'a') validationLogLoss = open((modelFolder + '/validLogLoss.txt'), 'a') validationLogAcc = open((modelFolder + '/validLogAcc.txt'), 'a') model = ViolenceModel(modelUsed, pretrained) trainParams = [] for params in model.parameters(): if params.requires_grad: trainParams += [params] model.train(True) if (torch.cuda.is_available()): model.cuda() lossFn = nn.CrossEntropyLoss() optimizerFn = torch.optim.RMSprop(trainParams, lr=lr) optimizerFn.zero_grad() optimScheduler = torch.optim.lr_scheduler.StepLR(optimizerFn, stepSize, decayRate) minAccuracy = 50 train_loss = [] val_loss = [] train_acc = [] val_acc = [] bestmodel = None for epoch in range(numEpochs): optimScheduler.step() epochLoss = 0 numCorrTrain = 0 iterPerEpoch = 0 model.train(True) print('Epoch = {}'.format(epoch + 1)) writer.add_scalar('lr', optimizerFn.param_groups[0]['lr'], epoch + 1) for i, (inputs, targets) in enumerate(trainLoader): iterPerEpoch += 1 optimizerFn.zero_grad() if (torch.cuda.is_available()): inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) else: inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4)) labelVariable = Variable(targets) outputLabel = model(inputVariable1) loss = lossFn(outputLabel, labelVariable) loss.backward() optimizerFn.step() outputProb = torch.nn.Softmax(dim=1)(outputLabel) _, predicted = torch.max(outputProb.data, 1) if (torch.cuda.is_available()): numCorrTrain += (predicted == targets.cuda()).sum() else: numCorrTrain += (predicted == targets).sum() epochLoss += loss.item() avgLoss = epochLoss / iterPerEpoch trainAccuracy = (float(numCorrTrain) * 100) / float(numTrainInstances) train_loss.append(avgLoss) train_acc.append(trainAccuracy) print('Training: Loss = {} | Accuracy = {}% '.format( avgLoss, trainAccuracy)) writer.add_scalar('train/epochLoss', avgLoss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) trainLogLoss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avgLoss)) trainLogAcc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if (epoch + 1) % evalInterval == 0: model.train(False) print('Evaluating...') validationLossEpoch = 0 validationIter = 0 numCorrTest = 0 for j, (inputs, targets) in enumerate(validationLoader): validationIter += 1 #if evalMode == 'centerCrop': if (torch.cuda.is_available()): inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), requires_grad=False) labelVariable = Variable(targets.cuda(async=True), requires_grad=False) else: inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4), requires_grad=False) labelVariable = Variable(targets, requires_grad=False) # else: # if(torch.cuda.is_available()): # inputVariable1 = Variable(inputs[0].permute(1, 0, 2, 3, 4).cuda(), requires_grad=False) # labelVariable = Variable(targets.cuda(async=True), requires_grad=False) # else: # inputVariable1 = Variable(inputs[0].permute(1, 0, 2, 3, 4), requires_grad=False) # labelVariable = Variable(targets, requires_grad=False) outputLabel = model(inputVariable1) validationLoss = lossFn(outputLabel, labelVariable) validationLossEpoch += validationLoss.item() outputProb = torch.nn.Softmax(dim=1)(outputLabel) _, predicted = torch.max(outputProb.data, 1) if (torch.cuda.is_available()): numCorrTest += (predicted == targets[0].cuda()).sum() else: numCorrTest += (predicted == targets[0]).sum() validationAccuracy = (float(numCorrTest) * 100) / float(numValidationInstances) avgValidationLoss = validationLossEpoch / validationIter val_loss.append(avgValidationLoss) val_acc.append(validationAccuracy) print('Testing: Loss = {} | Accuracy = {}% '.format( avgValidationLoss, validationAccuracy)) writer.add_scalar('test/epochloss', avgValidationLoss, epoch + 1) writer.add_scalar('test/accuracy', validationAccuracy, epoch + 1) validationLogLoss.write('valid Loss after {} epochs = {}\n'.format( epoch + 1, avgValidationLoss)) validationLogAcc.write( 'valid Accuracy after {} epochs = {}%\n'.format( epoch + 1, validationAccuracy)) if validationAccuracy > minAccuracy: bestmodel = model minAccuracy = validationAccuracy '''plotting the accuracy and loss curves''' if plotting: xc = range(1, numEpochs + 1) xv = [] for i in xc: if (i % evalInterval == 0): xv.append(i) plt.figure(1, figsize=(7, 5)) plt.plot(xc, train_loss) plt.plot(xv, val_loss) plt.xlabel('num of Epochs') plt.ylabel('loss') plt.title('train_loss vs val_loss') plt.grid(True) plt.legend(['train', 'val']) #print plt.style.available # use bmh, classic,ggplot for big pictures plt.style.use(['classic']) plt.savefig(modelFolder + "/lossCurve.png") plt.figure(2, figsize=(7, 5)) plt.plot(xc, train_acc) plt.plot(xv, val_acc) plt.xlabel('num of Epochs') plt.ylabel('accuracy') plt.title('train_acc vs val_acc') plt.grid(True) plt.legend(['train', 'val'], loc=4) #print plt.style.available # use bmh, classic,ggplot for big pictures plt.style.use(['classic']) plt.savefig(modelFolder + "/accuracyCurve.png") #plt.show() trainLogAcc.close() validationLogAcc.close() trainLogLoss.close() validationLogLoss.close() writer.export_scalars_to_json(modelFolder + "/all_scalars.json") writer.close() return bestmodel, validationAccuracy
type=int, default=250, help="何epochごとに学習率を減らすか") parser.add_argument('--manual_seed', default=1, type=int, help='Manually set random seed') args = parser.parse_args() return args if __name__ == '__main__': args = opt() #argsの読み出し args.arch = "ResNet-{}".format(args.model_depth) #実行するアーキテクチャを書き込む spatial_transform = Compose([ ToTensor(), #1iterごとに読み込まれる各フレーム(PIL Image)をTensorへ変換する ]) temporal_transform = TemporalRandomCrop4flow() #時間方向の前処理,今回はなし target_transform = ClassLabel() #学習する正解データ,2クラス分類なのでラベル #accuracies=AverageMeter()#各回におけるaccとその平均 model = test_generate_model(args) #モデルの読み込み(pretrainがあれば重みも読み込んでおく) test_data = get_training_set(args, spatial_transform, temporal_transform, target_transform) #データローダに入力するデータセットの作成 test_loader = torch.utils.data.DataLoader(test_data, batch_size=20) pred = [] Y = [] for i, (x, y) in enumerate(test_loader): x = torch.tensor(x).cuda()
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, attention): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() model_folder = os.path.join('./', out_dir, dataset, 'rgb', 'stage'+str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize]) vid_seq_train = makeDataset(train_data_dir, spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png',phase='train') train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if val_data_dir is not None: vid_seq_val = makeDataset(val_data_dir, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), seqLen=seqLen, fmt='.png',phase='test') val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() train_params = [] if stage == 1: model = attentionModel(num_classes=num_classes, mem_size=memSize, attention=attention) model.train(False) for params in model.parameters(): params.requires_grad = False else: model = attentionModel(num_classes=num_classes, mem_size=memSize, attention=attention) model.load_state_dict(torch.load(stage1_dict)) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model.cuda() loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): optim_scheduler.step() epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.lstm_cell.train(True) model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1) if stage == 2: model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for i, (inputs, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, _ = model(inputVariable) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += torch.sum(predicted == labelVariable.data).data.item() epoch_loss += loss.item() avg_loss = epoch_loss/iterPerEpoch trainAccuracy = (numCorrTrain / trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch+1) writer.add_scalar('train/accuracy', trainAccuracy, epoch+1) train_log_loss.write('Train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss)) train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy)) if val_data_dir is not None: if (epoch+1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 for j, (inputs, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) labelVariable = Variable(targets.cuda(async=True), volatile=True) output_label, _ = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() _, predicted = torch.max(output_label.data, 1) numCorr += torch.sum(predicted == labelVariable.data).data.item() val_accuracy = (numCorr / val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_rgb_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch+1) % 10 == 0: save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch+1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
def main_run(dataset, stage, root_dir, out_dir, seqLen, trainBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, outPool_size, split, evalInterval): mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) stage = stage test_split = split seqLen = seqLen memSize = memSize c_cam_classes = outPool_size dataset = dataset best_acc = 0 if stage == 1: trainBatchSize = trainBatchSize testBatchSize = trainBatchSize lr1 = lr1 decay_factor = decay_factor decay_step = decay_step numEpochs = numEpochs elif stage == 2: trainBatchSize = trainBatchSize testBatchSize = trainBatchSize lr1 = lr1 decay_factor = decay_factor decay_step = decay_step numEpochs = numEpochs if dataset == 'gtea_61': num_classes = 61 elif dataset == 'gtea_71': num_classes = 71 elif dataset == 'egtea_gaze+': num_classes = 106 else: print('Wrong dataset') sys.exit() dataset_dir = os.path.join(root_dir, dataset) model_folder = os.path.join('.', out_dir, dataset, str(test_split)) if not os.path.exists(model_folder): os.makedirs(model_folder) note_fl = open(model_folder + '/note.txt', 'w') note_fl.write('Number of Epochs = {}\n' 'lr = {}\n' 'Train Batch Size = {}\n' 'Sequence Length = {}\n' 'Decay steps = {}\n' 'Decay factor = {}\n' 'Memory size = {}\n' 'Memory cam classes = {}\n'.format(numEpochs, lr1, trainBatchSize, seqLen, decay_step, decay_factor, memSize, c_cam_classes)) note_fl.close() # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') train_log_loss_batch = open((model_folder + '/train_log_loss_batch.txt'), 'w') test_log_loss = open((model_folder + '/test_log_loss.txt'), 'w') test_log_acc = open((model_folder + '/test_log_acc.txt'), 'w') spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) print('Preparing dataset...') if dataset == 'egtea_gaze+': trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames = gen_split_egtea_gazePlus( dataset_dir, test_split) else: trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames, _ = gen_split( dataset_dir, test_split) vid_seq_train = makeDataset(trainDatasetF, trainLabels, trainNumFrames, spatial_transform=spatial_transform, fmt='.jpg', seqLen=seqLen) print('Number of train samples = {}'.format(vid_seq_train.__len__())) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, num_workers=4, pin_memory=True) vid_seq_test = makeDataset(testDatasetF, testLabels, testNumFrames, spatial_transform=Compose([ Scale(256), CenterCrop(224), ToTensor(), normalize ]), fmt='.jpg', seqLen=seqLen) print('Number of test samples = {}'.format(vid_seq_test.__len__())) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=2, pin_memory=True) train_params = [] if stage == 1: model = attentionModel(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes) model.train(False) for params in model.parameters(): params.requires_grad = False elif stage == 2: model = attentionModel(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes) checkpoint_path = os.path.join( model_folder, 'last_checkpoint_stage' + str(1) + '.pth.tar') if os.path.exists(checkpoint_path): print('Loading weights from checkpoint file {}'.format( checkpoint_path)) else: print('Checkpoint file {} does not exist'.format(checkpoint_path)) sys.exit() last_checkpoint = torch.load(checkpoint_path) model.load_state_dict(last_checkpoint['model_state_dict']) model.train(False) for params in model.parameters(): params.requires_grad = False for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] for params in model.lsta_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.classifier.train(True) model.cuda() loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=5e-4, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 for epoch in range(numEpochs): optim_scheduler.step() epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1) for i, (inputs, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, _ = model(inputVariable) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() if train_iter % 10 == 0: print('Training loss after {} iterations = {} '.format( train_iter, loss.data[0])) train_log_loss_batch.write( 'Training loss after {} iterations = {}\n'.format( train_iter, loss.data[0])) writer.add_scalar('train/iter_loss', loss.data[0], train_iter) epoch_loss += loss.data[0] avg_loss = epoch_loss / iterPerEpoch trainAccuracy = (numCorrTrain / trainSamples) * 100 print('Average training loss after {} epoch = {} '.format( epoch + 1, avg_loss)) print('Training accuracy after {} epoch = {}% '.format( epoch + 1, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) save_path_model = os.path.join( model_folder, 'last_checkpoint_stage' + str(stage) + '.pth.tar') save_file = { 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer_fn.state_dict(), 'best_acc': best_acc, } torch.save(save_file, save_path_model) if (epoch + 1) % evalInterval == 0: print('Testing...') model.train(False) test_loss_epoch = 0 test_iter = 0 test_samples = 0 numCorr = 0 for j, (inputs, targets) in enumerate(test_loader): print('testing inst = {}'.format(j)) test_iter += 1 test_samples += inputs.size(0) inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) labelVariable = Variable(targets.cuda(async=True), volatile=True) output_label, _ = model(inputVariable) test_loss = loss_fn(output_label, labelVariable) test_loss_epoch += test_loss.data[0] _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() test_accuracy = (numCorr / test_samples) * 100 avg_test_loss = test_loss_epoch / test_iter print('Test Loss after {} epochs, loss = {}'.format( epoch + 1, avg_test_loss)) print('Test Accuracy after {} epochs = {}%'.format( epoch + 1, test_accuracy)) writer.add_scalar('test/epoch_loss', avg_test_loss, epoch + 1) writer.add_scalar('test/accuracy', test_accuracy, epoch + 1) test_log_loss.write('Test Loss after {} epochs = {}\n'.format( epoch + 1, avg_test_loss)) test_log_acc.write('Test Accuracy after {} epochs = {}%\n'.format( epoch + 1, test_accuracy)) if test_accuracy > best_acc: best_acc = test_accuracy save_path_model = os.path.join( model_folder, 'best_checkpoint_stage' + str(stage) + '.pth.tar') save_file = { 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer_fn.state_dict(), 'best_acc': best_acc, } torch.save(save_file, save_path_model) train_log_loss.close() train_log_acc.close() test_log_acc.close() train_log_loss_batch.close() test_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
def score(self): normalize = get_normalize_method(self.opt.mean, self.opt.std, self.opt.no_mean_norm, self.opt.no_std_norm) spatial_transform = [ Resize(self.opt.sample_size), CenterCrop(self.opt.sample_size), ToTensor() ] spatial_transform.extend([ScaleValue(self.opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if self.opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(self.opt.sample_t_stride)) temporal_transform.append( TemporalEvenCrop(self.opt.sample_duration, self.opt.n_val_samples)) temporal_transform = TemporalCompose(temporal_transform) frame_count = get_n_frames(self.opt.video_jpgs_dir_path) frame_indices = list(range(0, frame_count)) frame_indices = temporal_transform(frame_indices) spatial_transform.randomize_parameters() image_name_formatter = lambda x: f'image_{x:05d}.jpg' loader = VideoLoader(image_name_formatter) print('frame_indices', frame_indices) #clips = [] video_outputs = [] model = generate_model(self.opt) model = load_pretrained_model(model, self.opt.pretrain_path, self.opt.model, self.opt.n_finetune_classes) i =0 for frame_indice in frame_indices: print("%d indice: %s" % (i, str(frame_indice))) i+=1 clip = loader(self.opt.video_jpgs_dir_path, frame_indice) clip = [spatial_transform(img) for img in clip] clip = torch.stack(clip, 0).permute(1, 0, 2, 3) #parameters = get_fine_tuning_parameters(model, opt.ft_begin_module) #print('clips:', clips) #for clip in clips: with torch.no_grad(): print(clip.shape) output = model(torch.unsqueeze(clip, 0)) output = F.softmax(output, dim=1).cpu() #print(output) video_outputs.append(output[0]) del clip video_outputs = torch.stack(video_outputs) average_scores = torch.mean(video_outputs, dim=0) #inference_loader, inference_class_names = main.get_inference_utils(self.opt) with self.opt.annotation_path.open('r') as f: data = json.load(f) class_to_idx = get_class_labels(data) idx_to_class = {} for name, label in class_to_idx.items(): idx_to_class[label] = name print(idx_to_class) inference_result = inference.get_video_results( average_scores, idx_to_class, self.opt.output_topk) print(inference_result)
def main(args): import os import numpy as np import sys import json import torch from torch import nn from torch import optim from torch.optim import lr_scheduler from opts import parse_opts from mean import get_mean, get_std from spatial_transforms import ( Compose, Normalize, Scale, CenterCrop, CornerCrop, MultiScaleCornerCrop, MultiScaleRandomCrop, RandomHorizontalFlip, ToTensor) from temporal_transforms import LoopPadding, TemporalRandomCrop from target_transforms import ClassLabel, VideoID from target_transforms import Compose as TargetCompose from dataset import get_training_set, get_validation_set, get_test_set from utils import Logger from train import train_epoch from validation import val_epoch import test import collections from sklearn.svm import LinearSVC from sklearn.svm import SVC from joblib import dump, load from sklearn import preprocessing from scipy import stats from sklearn.metrics import accuracy_score local_path = os.getcwd() if args.video_directory_path in ["", " ", '', './video', './video/']: video_path = local_path + '/video/' else: video_path = args.video_directory_path video_path_jpg = local_path + '/video_jpg/' if not os.path.exists(video_path_jpg): os.makedirs(video_path_jpg) extracted_feature_path = local_path + '/extracted_features' if not os.path.exists(extracted_feature_path): os.makedirs(extracted_feature_path) final_results_path = local_path + '/final_test_results' if not os.path.exists(final_results_path): os.makedirs(final_results_path) os.system('python utils/video_jpg.py' + ' ' + video_path + ' ' + video_path_jpg) os.system('python utils/n_frames.py' + ' ' + video_path_jpg) if args.pretrain_directory_path in ["", " ", '', './pretrain', './pretrain/']: pretrain_directory_path = local_path + '/pretrain' else: pretrain_directory_path = args.pretrain_directory_path import easydict opt = easydict.EasyDict({ "n_classes": 2, "sample_size": 112, "sample_duration": 16, "batch_size": 16, "n_threads": 4, "norm_value": 1, "resnet_shortcut": 'B', "resnext_cardinality": 32, }) opt.root_path = local_path opt.video_path = video_path_jpg # use two gpu devices on the server, you can customize it depending on how many available gpu devices you have os.environ['CUDA_VISIBLE_DEVICES']='0' from datasets.no_label_binary import NoLabelBinary mean = get_mean(opt.norm_value, dataset='kinetics') std = get_std(opt.norm_value) norm_method = Normalize(mean, [1,1,1]) spatial_transform = Compose([ Scale(opt.sample_size), CornerCrop(opt.sample_size, 'c'), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() # ClassLabel() # get test data test_data = NoLabelBinary( opt.video_path, None, 'testing', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=opt.sample_duration) # wrap test data test_loader = torch.utils.data.DataLoader( test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) # ### Extract Features # ##### 3D ResNeXt-101 from models import resnext # construct model architecture model_rxt101 = resnext.resnet101( num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, cardinality=opt.resnext_cardinality, sample_size=opt.sample_size, sample_duration=opt.sample_duration) model_rxt101 = model_rxt101.cuda() # wrap the current model again in nn.DataParallel / or we can just remove the .module keys. model_rxt101 = nn.DataParallel(model_rxt101, device_ids=None) ### Load pretrained weight # customize the pretrained model path pretrain = torch.load(pretrain_directory_path + '/resnext-101-kinetics.pth') pretrain_dict = pretrain['state_dict'] # do not load the last layer since we want to fine-tune it pretrain_dict.pop('module.fc.weight') pretrain_dict.pop('module.fc.bias') model_dict = model_rxt101.state_dict() model_dict.update(pretrain_dict) model_rxt101.load_state_dict(model_dict) # register layer index to extract the features by forwarding all the video clips activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook model_rxt101.module.avgpool.register_forward_hook(get_activation('avgpool')) model_rxt101.eval() # forward all the videos to extract features avgpool_test = [] targets_test = [] with torch.no_grad(): print("Extract test set features:") for i, (inputs, target) in enumerate(test_loader): if i % 30 == 0: print(i) output = model_rxt101(inputs) avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu()) targets_test.append(target) avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0) np.save(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy', avgpool_test_np) targets_test_np = np.concatenate(np.array(targets_test), axis=0) np.save(opt.root_path + '/extracted_features/class_names_test.npy', targets_test_np) # ##### 3D ResNet-50 from models import resnet # construct model architecture model_rt50 = resnet.resnet50( num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.sample_duration) model_rt50 = model_rt50.cuda() # wrap the current model again in nn.DataParallel / or we can just remove the .module keys. model_rt50 = nn.DataParallel(model_rt50, device_ids=None) ### Load pretrained weight # customize the pretrained model path pretrain = torch.load(pretrain_directory_path + '/resnet-50-kinetics.pth') pretrain_dict = pretrain['state_dict'] # do not load the last layer since we want to fine-tune it pretrain_dict.pop('module.fc.weight') pretrain_dict.pop('module.fc.bias') model_dict = model_rt50.state_dict() model_dict.update(pretrain_dict) model_rt50.load_state_dict(model_dict) # register layer index to extract the features by forwarding all the video clips activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook model_rt50.module.avgpool.register_forward_hook(get_activation('avgpool')) model_rt50.eval() # forward all the videos to extract features avgpool_test = [] with torch.no_grad(): print("Extract test set features:") for i, (inputs, target) in enumerate(test_loader): if i % 30 == 0: print(i) output = model_rt50(inputs) avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu()) # save the features avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0) np.save(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy', avgpool_test_np) # ### Load & fuse the features x_test_1 = np.load(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy') x_test_2 = np.load(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy') x_test = np.concatenate([x_test_1, x_test_2], axis=1) y_test = np.load(opt.root_path + '/extracted_features/class_names_test.npy') # ### Load Classification head and predict if args.model == 'hw4': # hw4 best model clf = load('./hw6_results/logistic2_ucf.joblib') y_pred_test_raw = clf.predict(x_test_2) y_pred_test_prob_raw = clf.predict_proba(x_test_2) elif args.model == 'hw5': # hw5 best model clf = load('./hw6_results/logistic_ucf.joblib') y_pred_test_raw = clf.predict(x_test) y_pred_test_prob_raw = clf.predict_proba(x_test) elif args.model == 'hw6': # hw6 best model clf = load('./hw6_results/logistic1_ucf.joblib') y_pred_test_raw = clf.predict(x_test_1) y_pred_test_prob_raw = clf.predict_proba(x_test_1) elif args.model == 'hw8': # hw8 best model clf = load('./hw8_results/logistic_ucf.joblib') y_pred_test_raw = clf.predict(x_test) y_pred_test_prob_raw = clf.predict_proba(x_test) elif args.model == 'final': # Final best model clf = load('./hw8_results/logistic1_ucf.joblib') y_pred_test_raw = clf.predict(x_test_1) y_pred_test_prob_raw = clf.predict_proba(x_test_1) split_idx = [] for idx, y_name in enumerate(y_test): if idx == 0 or y_name != y_test[idx-1]: split_idx.append(idx) split_idx.append(len(y_test)) y_pred_test, y_pred_test_prob, y_pred_test_final = {}, {}, {} for i, split in enumerate(split_idx): if i < len(split_idx) - 1: y_pred_test[y_test[split]] = y_pred_test_raw[split:split_idx[i+1]] y_pred_test_prob[y_test[split]] = y_pred_test_prob_raw[split:split_idx[i+1]] y_pred_test_final[y_test[split]] = np.argmax(np.mean(y_pred_test_prob_raw[split:split_idx[i+1]], axis=0)) # ### Get the length (in seconds) of each video clip tvns = list(y_pred_test_final.keys()) mp4_path = video_path clip_duration_dict = {} from moviepy.editor import VideoFileClip i = 0 for tvn in tvns: i += 1 if i % 100 == 0: print(i) clip = VideoFileClip(os.path.join(mp4_path, tvn + ".mp4")) clip_duration_dict[tvn] = [clip.duration] # ### Generate Figures import matplotlib.pyplot as plt for tvn in clip_duration_dict: interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn) x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval y_idx = np.argmax(y_pred_test_prob[tvn], 1) y = y_pred_test_prob[tvn][:, 1] x = x[:len(y)] plt.plot(x, y) plt.ylim([-0.1, 1.1]) plt.xlabel ('time/sec') plt.ylabel ('pred score for ground truth label') plt.title("Ground Truth Label: " + tvn + "\n Model Avg. Predict Score: " + str(np.mean(y))) # str(real_prediction_dict[tvn]['score']) plt.savefig(opt.root_path + "/final_test_results/" + tvn + '_' + args.model + "_UIN-625007598", bbox_inches='tight') plt.close() # ### Generate Json timeTrueLabel = {} for tvn in clip_duration_dict: if tvn in y_pred_test_prob: interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn) x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval y_idx = np.argmax(y_pred_test_prob[tvn], 1) y = y_pred_test_prob[tvn][:, 1] x = x[:len(y)] timeTrueLabel[tvn] = [[str(time), str(y[idx])] for idx, time in enumerate(x)] with open(opt.root_path + '/final_test_results/timeLabel_' + args.model + '_UIN-625007598.json', 'w') as fp: json.dump(timeTrueLabel, fp)
import torch from torch.utils.data import Dataset from PIL import Image import numpy as np import random import glob import sys from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop, RandomHorizontalFlip) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform2 = Compose([Scale((7, 7)), ToTensor()]) def listDirectory(path): if os.path.isdir(path): return os.listdir(path) return [] def gen_split(root_dir, stackSize): DatasetF = [] Labels = [] NumFrames = [] #The root directory should be processed frames/train or test for dir_user in sorted(os.listdir(root_dir)):
def main_run(stage, train_data_dir, val_data_dir, stage1Dict, stage1Dict_rgb, stage1Dict_fc, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize): #dataset = 'gtea61' num_classes = 61 model_folder = os.path.join( './', out_dir, 'attConvLSTMDoubleResnet', str(seqLen), 'stage' + str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vid_seq_train = makeDataset(train_data_dir, seqLen=seqLen, fmt='.png', users=['S1', 'S3', 'S4'], spatial_transform=spatial_transform) trainInstances = vid_seq_train.__len__() train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if val_data_dir is not None: vid_seq_val = makeDataset(val_data_dir, seqLen=seqLen, fmt='.png', users=['S2'], train=False, spatial_transform=Compose([ Scale(256), CenterCrop(224), ToTensor(), normalize ])) valInstances = vid_seq_val.__len__() val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) train_params = [] model = twoStreamFlowCol(num_classes=num_classes, memSize=memSize, frameModel=stage1Dict_rgb, flowModel=stage1Dict_fc) model.train(False) for params in model.parameters(): params.requires_grad = False model.train(False) train_params = [] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.lstm_cell.parameters(): train_params += [params] params.requires_grad = True for params in model.frameModel.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.fc.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] for params in model.flowModel.lstm_cell.parameters(): train_params += [params] params.requires_grad = True for params in model.flowModel.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.flowModel.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.flowModel.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.flowModel.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.flowModel.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.flowModel.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.flowModel.resNet.fc.parameters(): params.requires_grad = True train_params += [params] model.cuda() trainSamples = vid_seq_train.__len__() min_accuracy = 0 loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.SGD(train_params, lr=lr1, momentum=0.9, weight_decay=5e-4) optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1) #for i, (inputs, targets) in enumerate(train_loader): for inputs, inputsSN, targets in train_loader: train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) inputSNVariable = Variable(inputsSN.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, _ = model(inputVariable, inputSNVariable) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epoch_loss += loss.item() optim_scheduler.step() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if val_data_dir is not None: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 with torch.no_grad(): #for j, (inputs, targets) in enumerate(val_loader): for inputs, inputsSN, targets in val_loader: val_iter += 1 val_samples += inputs.size(0) inputVariable = Variable( inputs.permute(1, 0, 2, 3, 4).cuda()) inputSNVariable = Variable( inputsSN.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda(async=True)) #labelVariable = Variable(targets.cuda()) output_label, _ = model(inputVariable, inputSNVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() val_accuracy = torch.true_divide(numCorr, val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format( epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_rgb_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
def main_run(stage, model, supervision, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, lr_suphead, lr_resnet, alpha, decay_factor, decay_step, lossSupervision, memSize): num_classes = 61 if model == 'ConvLSTMAttention': model = ConvLSTMAttention(num_classes=num_classes, mem_size=memSize, supervision=supervision, loss_supervision=lossSupervision) elif model == 'ConvLSTM': model = ConvLSTM(num_classes=num_classes, mem_size=memSize, supervision=supervision, loss_supervision=lossSupervision) elif model == 'SupervisedLSTMMod': model = SupervisedLSTMMod(num_classes=num_classes, mem_size=memSize, supervision=supervision, loss_supervision=lossSupervision) elif model == 'MyNetIDT': model = MyNetIDT(num_classes=num_classes, mem_size=memSize, supervision=supervision, loss_supervision=lossSupervision) else: print('Model not found') sys.exit() model_folder = os.path.join( './', out_dir, 'rgb', 'stage' + str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) spatial_transform_map = Cp([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), Resize((7, 7)), TT() ]) spatial_transform_map_2 = Cp([Resize((7, 7)), TT()]) vid_seq_train = makeDataset_supervision( train_data_dir, train=True, spatial_transform=spatial_transform, spatial_transform_map=spatial_transform_map, seqLen=seqLen, fmt='.png') train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=8, pin_memory=True) if val_data_dir is not None: vid_seq_val = makeDataset_supervision( val_data_dir, train=False, spatial_transform_map=spatial_transform_map_2, spatial_transform=Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]), seqLen=seqLen, fmt='.png') val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=8, pin_memory=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() train_params = [] train_params3 = [] train_params2 = [] if stage == 0: for params in model.resNet.parameters(): params.requires_grad = True train_params += [params] if stage1_dict is not None: model.load_state_dict(torch.load(stage1_dict)) elif stage == 1: supervision = False model.eval() for params in model.parameters(): params.requires_grad = False else: model.load_state_dict(torch.load(stage1_dict)) model.train() for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) model.sup_head.train() for params in model.lstm_cell.parameters(): params.requires_grad = True train_params2 += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params2 += [params] for params in model.sup_head.parameters(): params.requires_grad = True train_params3 += [params] model.lstm_cell.train() model.classifier.train() model.cuda() if lossSupervision == "classification": loss_sup = nn.CrossEntropyLoss() elif lossSupervision == "regression": loss_sup = nn.L1Loss() loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.Adam([{ "params": train_params, "lr": lr_resnet }, { "params": train_params3, "lr": lr_suphead }, { "params": train_params2, "lr": lr1 }], lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 epoch_loss_ = 0 loss_ = 0 model.lstm_cell.train(True) model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1) if stage == 0: model.train() if stage == 2: model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.sup_head.train() model.resNet.fc.train(True) for i, (inputs, targets, maps) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() if lossSupervision == "classification": maps = torch.ceil(maps) maps = maps.type(torch.LongTensor) maps = maps.permute(1, 0, 2, 3, 4).squeeze(2).cuda() maps = maps.reshape(maps.shape[0] * maps.shape[1], maps.shape[2], maps.shape[3]) else: maps = maps.permute(1, 0, 2, 3, 4).cuda() maps = maps.reshape(maps.shape[0] * maps.shape[1], maps.shape[2], maps.shape[3], maps.shape[4]) inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, _, output_super = model(inputVariable) if supervision == True: loss_ = alpha * loss_sup(output_super, maps) loss_.backward(retain_graph=True) epoch_loss_ += loss_.data.item() loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epoch_loss += loss.data.item() optim_scheduler.step() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = (numCorrTrain / float(trainSamples)) * 100 avg_loss_ = epoch_loss_ / float(iterPerEpoch) print( 'Train: Epoch = {} | Loss = {} | Accuracy = {} | supervision_loss {}' .format(epoch + 1, avg_loss, trainAccuracy, avg_loss_)) train_log_loss.write('Train Loss after {} epochs = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format( epoch + 1, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) if val_data_dir is not None: if (epoch + 1) % 1 == 0: model.eval() val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 for j, (inputs, targets, _) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) with torch.no_grad(): inputVariable = Variable( inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable( targets.cuda(non_blocking=True)) output_label, _, _ = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.data.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() val_accuracy = (numCorr / float(val_samples)) * 100 avg_val_loss = val_loss_epoch / val_iter print('val: Epoch = {} | Loss = {} | Accuracy = {} '.format( epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write( 'Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_rgb_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch + 1) % 10 == 0: save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch + 1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
def get_train_utils(opt, model_parameters): assert opt.train_crop in ['random', 'corner', 'center'] spatial_transform = [] if opt.train_crop == 'random': spatial_transform.append( RandomResizedCrop( opt.sample_size, (opt.train_crop_min_scale, 1.0), (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio))) elif opt.train_crop == 'corner': scales = [1.0] scale_step = 1 / (2**(1 / 4)) for _ in range(1, 5): scales.append(scales[-1] * scale_step) spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales)) elif opt.train_crop == 'center': spatial_transform.append(Resize(opt.sample_size)) spatial_transform.append(CenterCrop(opt.sample_size)) normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) if not opt.no_hflip: spatial_transform.append(RandomHorizontalFlip()) if opt.colorjitter: spatial_transform.append(ColorJitter()) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ScaleValue(opt.value_scale)) spatial_transform.append(normalize) spatial_transform = Compose(spatial_transform) assert opt.train_t_crop in ['random', 'center'] temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) if opt.train_t_crop == 'random': temporal_transform.append(TemporalRandomCrop(opt.sample_duration)) elif opt.train_t_crop == 'center': temporal_transform.append(TemporalCenterCrop(opt.sample_duration)) temporal_transform = TemporalCompose(temporal_transform) train_data = get_training_data(opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) if opt.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=opt.batch_size, shuffle=(train_sampler is None), num_workers=opt.n_threads, pin_memory=True, sampler=train_sampler, worker_init_fn=worker_init_fn) if opt.is_master_node: train_logger = Logger(opt.result_path / 'train.log', ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( opt.result_path / 'train_batch.log', ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) else: train_logger = None train_batch_logger = None if opt.nesterov: dampening = 0 else: dampening = opt.dampening optimizer = SGD(model_parameters, lr=opt.learning_rate, momentum=opt.momentum, dampening=dampening, weight_decay=opt.weight_decay, nesterov=opt.nesterov) assert opt.lr_scheduler in ['plateau', 'multistep'] assert not (opt.lr_scheduler == 'plateau' and opt.no_val) if opt.lr_scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, 'min', patience=opt.plateau_patience) else: scheduler = lr_scheduler.MultiStepLR(optimizer, opt.multistep_milestones) return (train_loader, train_sampler, train_logger, train_batch_logger, optimizer, scheduler)
assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': spatial_crop_method = MultiScaleRandomCrop(opt.scales, opt.frame_size) elif opt.train_crop == 'corner': spatial_crop_method = MultiScaleCornerCrop(opt.scales, opt.frame_size) elif opt.train_crop == 'center': spatial_crop_method = MultiScaleCornerCrop(opt.scales, opt.frame_size, crop_positions=['c']) spatial_transform = Compose([ spatial_crop_method, RandomHorizontalFlip(), ToTensor(opt.norm_value), normalize ]) training_data = get_training_set(opt, spatial_transform, temporal_transform) train_loader = DataLoaderX(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True, drop_last=True) train_logger = Logger(os.path.join(opt.save_path, 'train.log'), ['epoch', 'loss', 'final_mAP', 'lr']) temporal_transform = TemporalSegmentCenterCrop(opt.segment_number, opt.sample_duration)
def main_run(dataset, flowModel_state_dict, RGBModel_state_dict, dataset_dir, stackSize, seqLen, memSize, numSeg): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) flow_wt = 0.5 testBatchSize = 1 sequence = True spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, sequence=sequence, numSeg=numSeg, stackSize=stackSize, fmt='.jpg', phase='Test', seqLen=seqLen) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=2, pin_memory=True) modelFlow = flow_resnet34(False, channels=2 * stackSize, num_classes=num_classes) modelFlow.load_state_dict(torch.load(flowModel_state_dict)) modelRGB = attentionModel(num_classes=num_classes, mem_size=memSize) modelRGB.load_state_dict(torch.load(RGBModel_state_dict)) for params in modelFlow.parameters(): params.requires_grad = False for params in modelRGB.parameters(): params.requires_grad = False modelFlow.train(False) modelRGB.train(False) modelFlow.cuda() modelRGB.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorrTwoStream = 0 true_labels = [] predicted_labels = [] for j, (inputFlow, inputFrame, targets) in enumerate(test_loader): inputVariableFlow = Variable(inputFlow[0].cuda(), volatile=True) inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda(), volatile=True) output_labelFlow, _ = modelFlow(inputVariableFlow) output_labelFrame, _ = modelRGB(inputVariableFrame) output_label_meanFlow = torch.mean(output_labelFlow.data, 0, True) output_label_meanTwoStream = (flow_wt * output_label_meanFlow) + ( (1 - flow_wt) * output_labelFrame.data) _, predictedTwoStream = torch.max(output_label_meanTwoStream, 1) numCorrTwoStream += (predictedTwoStream == targets[0]).sum() true_labels.append(targets) predicted_labels.append(predictedTwoStream) test_accuracyTwoStream = (numCorrTwoStream / test_samples) * 100 print('Test Accuracy = {}'.format(test_accuracyTwoStream)) cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] ticks = np.linspace(0, 60, num=61) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(ticks, fontsize=6) plt.yticks(ticks, fontsize=6) plt.grid(True) plt.clim(0, 1) plt.savefig(dataset + '-twoStream.jpg', bbox_inches='tight') plt.show()
def main_run(model_state_dict, dataset_dir, seqLen, memSize, out_dir): model_folder = os.path.join('./', out_dir, 'attConvLSTM', str(seqLen)) #dataset = 'gtea61' num_classes = 61 mean=[0.485, 0.456, 0.406] std=[0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, seqLen=seqLen, fmt='.png', train=False, spatial_transform=spatial_transform, users=['S2']) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) model = attentionModel(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorr = 0 true_labels = [] predicted_labels = [] with torch.no_grad(): #for j, (inputs, targets) in enumerate(test_loader): for inputs, targets in test_loader: inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) output_label, _ = model(inputVariable) _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() true_labels.append(targets) predicted_labels.append(predicted.cpu()) test_accuracy = torch.true_divide(numCorr, test_samples) * 100 test_accuracy = 'Test Accuracy = {}%'.format(test_accuracy) print(test_accuracy) fil = open(model_folder + "/test_log_acc.txt", "w") fil.write(test_accuracy) fil.close() cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] ticks = np.linspace(0, 60, num=61) plt.figure(1, figsize=(12, 12), dpi=100.0) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(ticks, fontsize=6) plt.yticks(ticks, fontsize=6) plt.grid(True) plt.clim(0, 1) xy = np.arange(start=0, stop=61) plt.plot(xy,xy) plt.savefig(model_folder + '/cnf_matrix_normalized.png', bbox_inches='tight') plt.show()
def main_run(dataset, trainDir, valDir, outDir, stackSize, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, uniform_sampling, debug): # GTEA 61 num_classes = 61 # Train/Validation/Test split train_splits = ["S1", "S3", "S4"] val_splits = ["S2"] if debug: n_workers = 0 device = 'cpu' else: n_workers = 4 device = 'cuda' min_accuracy = 0 model_folder = os.path.join('./', outDir, dataset, 'flow') # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Dir {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') #num_workers = 4 # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vid_seq_train = makeDataset(trainDir, train_splits, spatial_transform=spatial_transform, sequence=False, stackSize=stackSize, fmt='.png', uniform_sampling=uniform_sampling) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, sampler=None, num_workers=n_workers, pin_memory=True) vid_seq_val = makeDataset(trainDir, val_splits, spatial_transform=Compose([ Scale(256), CenterCrop(224), ToTensor(), normalize ]), sequence=False, stackSize=stackSize, fmt='.png', phase='Test', uniform_sampling=uniform_sampling) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=n_workers, pin_memory=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() print('Number of samples in the dataset: training = {} | validation = {}'. format(trainInstances, valInstances)) model = flow_resnet34(True, channels=2 * stackSize, num_classes=num_classes) model.train(True) train_params = list(model.parameters()) model.to(device) loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.SGD(train_params, lr=lr1, momentum=0.9, weight_decay=5e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.train(True) for i, (inputs, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = inputs.to(device) labelVariable = targets.to(device) trainSamples += inputs.size(0) output_label, _ = model(inputVariable) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.to(device)).sum() epoch_loss += loss.data.item() optim_scheduler.step() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = (numCorrTrain.data.item() / trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if (epoch + 1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 for j, (inputs, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) inputVariable = inputs.to(device) labelVariable = targets.to(device) output_label, _ = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.data.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.to(device)).sum() val_accuracy = (numCorr.data.item() / val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_val_loss, val_accuracy)) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_flow_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch + 1) % 10 == 0: save_path_model = (model_folder + '/model_flow_state_dict_epoch' + str(epoch + 1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close()
def main_run(version, flowModel, rgbModel, stackSize, seqLen, memSize, trainDatasetDir, outDir, trainBatchSize, valBatchSize, lr1, numEpochs, decay_step, decay_factor): num_classes = 61 # gtea61 dataset model_folder = os.path.join("./", outDir, version) # Create the dir print(f"Checking directory {model_folder}") if os.path.exists(model_folder): print('Dir {} exists!'.format(model_folder)) sys.exit() print(f"Creating directory{model_folder}") os.makedirs(model_folder) # Log files print(f"Creating log files") train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # ImageNet mean and std mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] # Train val partitioning train_usr = ["S1", "S3", "S4"] val_usr = ["S2"] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize]) # train dataset print(f"Defining train dataset") vid_seq_train = makeDataset(trainDatasetDir, train_usr, spatial_transform, stackSize=stackSize, seqLen=seqLen) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) # val dataset print(f"Defining validation dataset") vid_seq_val = makeDataset(trainDatasetDir, val_usr, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), stackSize=stackSize, phase="val", seqLen=seqLen) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valSamples = vid_seq_val.__len__() # model print("Building model") model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=stackSize, memSize=memSize, # see twoStreamModel.py num_classes=num_classes) print("Setting trainable parameters") for params in model.parameters(): # initially freeze all layers params.requires_grad = False model.train(False) train_params = [] for params in model.classifier.parameters(): # unfreeze classifier layer (the layer that joins the two models outputs) params.requires_grad = True train_params += [params] for params in model.frameModel.lstm_cell.parameters(): # unfreeze lstm layer of the frame model train_params += [params] params.requires_grad = True for params in model.frameModel.resNet.layer4[0].conv1.parameters(): #unfreeze layer 4 params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.fc.parameters(): # unfreeze last fully connected layer of frame model params.requires_grad = True # (I still don't know why, because in the joining of the two models, this layer is skipped) train_params += [params] base_params = [] for params in model.flowModel.layer4.parameters(): # unfreeze layer 4 of flow model base_params += [params] params.requires_grad = True print("Moving model to GPU") model.to(DEVICE) trainSamples = vid_seq_train.__len__() min_accuracy = 0 print("Defining loss function, optimizer and scheduler") loss_fn = nn.CrossEntropyLoss() # loss function optimizer_fn = torch.optim.SGD([ # optimizer {'params': train_params}, {'params': base_params, 'lr': 1e-4}, # 1e-4 ], lr=lr1, momentum=0.9, weight_decay=5e-4) #scheduler optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=decay_step, gamma=decay_factor) train_iter = 0 print("Training begun") # TRAIN PROCEDURE for epoch in range(numEpochs): optim_scheduler.step() epoch_loss = 0 numCorrTrain = 0 iterPerEpoch = 0 model.classifier.train(True) model.flowModel.layer4.train(True) start = time.time() for j, (inputFrame, inputMMaps, inputFlow, targets) in enumerate(train_loader): print(f"step {j} / {int(np.floor(trainSamples/trainBatchSize))}") train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() # put gradients to zero inputVariableFlow = Variable(inputFlow.to(DEVICE)) inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE)) labelVariable = Variable(targets.to(DEVICE)) #print("predict") output_label = model(inputVariableFlow, inputVariableFrame) # predict loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable) # compute loss #print("backprop") loss.backward() optimizer_fn.step() #print("accuracy") _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.to(DEVICE)).sum() # counting number of correct predictions epoch_loss += loss.data.item() avg_loss = epoch_loss / iterPerEpoch # computing average per epoch loss trainAccuracy = (numCorrTrain.item() / trainSamples) * 100 print('Average training loss after {} epoch = {} '.format(epoch + 1, avg_loss)) print('Training accuracy after {} epoch = {}% '.format(epoch + 1, trainAccuracy)) train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch + 1, avg_loss)) # log file train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch + 1, trainAccuracy)) # log file print(f"Elapsed : {time.time()-start}") # VALIDATION if (epoch + 1) % 5 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 numCorr = 0 for j, (inputFrame, inputMMaps, inputFlow, targets) in enumerate(val_loader): if j % 1 == 0: print(f"step {j} / {int(np.floor(vid_seq_val.__len__()/valBatchSize))}") val_iter += 1 inputVariableFlow = Variable(inputFlow.to(DEVICE)) inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE)) labelVariable = Variable(targets.to(DEVICE)) output_label = model(inputVariableFlow, inputVariableFrame) loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable) val_loss_epoch += loss.data.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == labelVariable.data).sum() val_accuracy = (numCorr.item() / valSamples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val Loss after {} epochs, loss = {}'.format(epoch + 1, avg_val_loss)) print('Val Accuracy after {} epochs = {}%'.format(epoch + 1, val_accuracy)) val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss)) # log file val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) # log file if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_twoStream_state_dict.pth') # every epoch, check if the val accuracy is improved, if so, save that model torch.save(model.state_dict(), save_path_model) # in that way, even if the model overfit, you will get always the best model min_accuracy = val_accuracy # in this way you don't have to care too much about the number of epochs train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close()
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop, RandomHorizontalFlip) import matplotlib.pyplot as plt import importlib importlib.reload(grad_cam) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose( [Scale(256), MultiScaleCornerCrop([1], 224), ToTensor(), normalize]) def frame_example(image): flowModel = "../experiments/gtea61/flow/model_flow_state_dict.pth" rgbModel = "modelsFolder/experiments/gtea61/rgb/stage2/model_rgb_state_dict.pth" stackSize = 5 memSize = 512 num_classes = 61 seqLen = 7 model_state_dict = "modelsFolder/selfSupervisedExperiments/gtea61/twoStream/model_twoStream_state_dict.pth" trainDatasetDir = "../GTEA61/flow_x_processed/train" model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=stackSize, memSize=memSize,
def __init__(self): self.model_methods = [['resnext', 'gradcam', 'camshow']] self.classes = [ "brush_hair", "cartwheel", "catch", "chew", "clap", "climb", "climb_stairs", "dive", "draw_sword", "dribble", "drink", "eat", "fall_floor", "fencing", "flic_flac", "golf", "handstand", "hit", "hug", "jump", "kick", "kick_ball", "kiss", "laugh", "pick", "pour", "pullup", "punch", "push", "pushup", "ride_bike", "ride_horse", "run", "shake_hands", "shoot_ball", "shoot_bow", "shoot_gun", "sit", "situp", "smile", "smoke", "somersault", "stand", "swing_baseball", "sword", "sword_exercise", "talk", "throw", "turn", "walk", "wave" ] scales = [1.0] self.spatial_transform = Compose([ MultiScaleCornerCrop(scales, 112), ToTensor(1.0), Normalize(get_mean(1.0, dataset='activitynet'), get_std(1.0)) ]) self.spatial_transform2 = Compose([MultiScaleCornerCrop(scales, 112)]) self.spatial_transform3 = Compose([ MultiScaleCornerCrop(scales, 112), ToTensor(1), Normalize([0, 0, 0], [1, 1, 1]) ]) self.model = utils.load_model(self.model_methods[0][0]) self.model.cuda() #self.video=[] #self.flows=[] self.bb_frames = [] #self.explainer= get_explainer method_name = 'gradcam' self.explainer = get_explainer(self.model, method_name, "conv1") self.explainer2 = get_explainer(self.model, method_name, "layer1") self.explainer3 = get_explainer(self.model, method_name, "layer2") self.explainer4 = get_explainer(self.model, method_name, "layer3") self.explainer5 = get_explainer(self.model, method_name, "layer4") self.explainer6 = get_explainer(self.model, method_name, "avgpool") path = "images/frames4" #print path self.path = path + "/" #dirc = os.listdir(path) #self.files = [ fname for fname in dirc if fname.startswith('img')] #self.files2 = [ fname for fname in dirc if fname.startswith('flow_x')] self.seq = [] self.kls = [] self.scr = [] self.totalhit = 0 self.totalhit2 = 0 self.totalhit3 = 0 self.totalhit4 = 0 self.totalhit5 = 0 self.totalhit6 = 0 self.totalhit7 = 0 self.totalframes = 0