def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inference_data, collate_fn = get_inference_data( opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, opt.inference_subset, spatial_transform, temporal_transform) inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, worker_init_fn=worker_init_fn, collate_fn=collate_fn) return inference_loader, inference_data.class_names
testBatchSize=1 trainX, trainY, testX, testY = make_split(data_path) mean=[0.485, 0.456, 0.406] std=[0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize]) vidSeqTrain = makeDataset(trainX, trainY, spatial_transform=spatial_transform, seqLen=seqLen) trainLoader = torch.utils.data.DataLoader(vidSeqTrain, batch_size=args.trainBatchSize, shuffle=True, num_workers=0) test_spatial_transform = Compose([Scale(256), CenterCrop(224), FlippedImagesTest(mean=mean, std=std)]) vidSeqTest = makeDataset(testX, testY, seqLen=seqLen, spatial_transform=test_spatial_transform) testLoader = torch.utils.data.DataLoader(vidSeqTest, batch_size=testBatchSize, shuffle=False, num_workers=1) # trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) # trainLoader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2) # testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) # testLoader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) # classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
def main_run(numEpochs, lr, stepSize, decayRate, trainBatchSize, seqLen, memSize, evalInterval, evalMode, numWorkers, outDir, fightsDir_train, noFightsDir_train, fightsDir_test, noFightsDir_test): train_dataset_dir_fights = fightsDir_train train_dataset_dir_noFights = noFightsDir_train test_dataset_dir_fights = fightsDir_test test_dataset_dir_noFights = noFightsDir_test trainDataset, trainLabels, trainNumFrames = make_split( train_dataset_dir_fights, train_dataset_dir_noFights) testDataset, testLabels, testNumFrames = make_split( test_dataset_dir_fights, test_dataset_dir_noFights) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vidSeqTrain = VideoDataset(trainDataset, trainLabels, trainNumFrames, spatial_transform=spatial_transform, seqLen=seqLen) trainLoader = torch.utils.data.DataLoader(vidSeqTrain, batch_size=trainBatchSize, shuffle=True, num_workers=numWorkers, pin_memory=True, drop_last=True) if evalMode == 'centerCrop': test_spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) testBatchSize = 1 elif evalMode == 'tenCrops': test_spatial_transform = Compose( [Scale(256), TenCrops(size=224, mean=mean, std=std)]) testBatchSize = 1 elif evalMode == 'fiveCrops': test_spatial_transform = Compose( [Scale(256), FiveCrops(size=224, mean=mean, std=std)]) testBatchSize = 1 elif evalMode == 'horFlip': test_spatial_transform = Compose([ Scale(256), CenterCrop(224), FlippedImagesTest(mean=mean, std=std) ]) testBatchSize = 1 vidSeqTest = VideoDataset(testDataset, testLabels, testNumFrames, seqLen=seqLen, spatial_transform=test_spatial_transform) testLoader = torch.utils.data.DataLoader(vidSeqTest, batch_size=testBatchSize, shuffle=False, num_workers=int(numWorkers / 2), pin_memory=True) numTrainInstances = vidSeqTrain.__len__() numTestInstances = vidSeqTest.__len__() print('Number of training samples = {}'.format(numTrainInstances)) print('Number of testing samples = {}'.format(numTestInstances)) modelFolder = './experiments_' + outDir # Dir for saving models and log files # Create the dir if os.path.exists(modelFolder): print(modelFolder + ' exists!!!') sys.exit() else: os.makedirs(modelFolder) # Log files writer = SummaryWriter(modelFolder) trainLogLoss = open((modelFolder + '/trainLogLoss.txt'), 'w') trainLogAcc = open((modelFolder + '/trainLogAcc.txt'), 'w') testLogLoss = open((modelFolder + '/testLogLoss.txt'), 'w') testLogAcc = open((modelFolder + '/testLogAcc.txt'), 'w') model = ViolenceModel(mem_size=memSize) trainParams = [] for params in model.parameters(): params.requires_grad = True trainParams += [params] model.train(True) model.cuda() lossFn = nn.CrossEntropyLoss() optimizerFn = torch.optim.RMSprop(trainParams, lr=lr) optimScheduler = torch.optim.lr_scheduler.StepLR(optimizerFn, stepSize, decayRate) minAccuracy = 50 for epoch in range(numEpochs): optimScheduler.step() epochLoss = 0 numCorrTrain = 0 iterPerEpoch = 0 model.train(True) print('Epoch = {}'.format(epoch + 1)) writer.add_scalar('lr', optimizerFn.param_groups[0]['lr'], epoch + 1) for i, (inputs, targets) in enumerate(trainLoader): iterPerEpoch += 1 optimizerFn.zero_grad() inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) outputLabel = model(inputVariable1) loss = lossFn(outputLabel, labelVariable) loss.backward() optimizerFn.step() outputProb = torch.nn.Softmax(dim=1)(outputLabel) _, predicted = torch.max(outputProb.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epochLoss += loss.data[0] avgLoss = epochLoss / iterPerEpoch trainAccuracy = (numCorrTrain / numTrainInstances) * 100 print('Training: Loss = {} | Accuracy = {}% '.format( avgLoss, trainAccuracy)) writer.add_scalar('train/epochLoss', avgLoss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) trainLogLoss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avgLoss)) trainLogAcc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if (epoch + 1) % evalInterval == 0: model.train(False) print('Evaluating...') testLossEpoch = 0 testIter = 0 numCorrTest = 0 for j, (inputs, targets) in enumerate(testLoader): testIter += 1 if evalMode == 'centerCrop': inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) else: inputVariable1 = Variable(inputs[0].cuda(), volatile=True) labelVariable = Variable(targets.cuda(async=True), volatile=True) outputLabel = model(inputVariable1) outputLabel_mean = torch.mean(outputLabel, 0, True) testLoss = lossFn(outputLabel_mean, labelVariable) testLossEpoch += testLoss.data[0] _, predicted = torch.max(outputLabel_mean.data, 1) numCorrTest += (predicted == targets[0]).sum() testAccuracy = (numCorrTest / numTestInstances) * 100 avgTestLoss = testLossEpoch / testIter print('Testing: Loss = {} | Accuracy = {}% '.format( avgTestLoss, testAccuracy)) writer.add_scalar('test/epochloss', avgTestLoss, epoch + 1) writer.add_scalar('test/accuracy', testAccuracy, epoch + 1) testLogLoss.write('Test Loss after {} epochs = {}\n'.format( epoch + 1, avgTestLoss)) testLogAcc.write('Test Accuracy after {} epochs = {}%\n'.format( epoch + 1, testAccuracy)) if testAccuracy > minAccuracy: savePathClassifier = (modelFolder + '/bestModel.pth') torch.save(model, savePathClassifier) minAccuracy = testAccuracy trainLogAcc.close() testLogAcc.close() trainLogLoss.close() testLogLoss.close() writer.export_scalars_to_json(modelFolder + "/all_scalars.json") writer.close() return True
def main_run(dataset, stage, trainDatasetDir, valDatasetDir, stage1_dict, stackSize, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, alphaX, alphaY): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() model_folder = os.path.join( './', out_dir, 'attConvLSTM', str(seqLen), 'stage' + str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224) ]) spatial_transform2 = Compose([Scale((7, 7)), ToTensor()]) vid_seq_train = makeDataset(trainDatasetDir, spatial_transform2, spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', seqLen=seqLen) trainInstances = vid_seq_train.__len__() train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if valDatasetDir is not None: vid_seq_val = makeDataset(valDatasetDir, spatial_transform2, spatial_transform=Compose( [Scale(256), CenterCrop(224)]), sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen) valInstances = vid_seq_val.__len__() val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) train_params = [] if stage == 1: model = attentionModel(num_classes=num_classes, mem_size=memSize) model.train(False) for params in model.parameters(): params.requires_grad = False else: # stage == 2 model = attentionModel(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(stage1_dict), strict=False) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model.cuda() loss_fn = nn.CrossEntropyLoss() loss_fn_regression = nn.MSELoss() # Loss function for the regression model optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 x_loss = 0 y_loss = 0 trainSamples = 0 iterPerEpoch = 0 model.lstm_cell.train(True) model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1) if stage == 2: model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) #for i, (inputs, targets) in enumerate(train_loader): for flowX, flowY, inputs, targets in train_loader: train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() flowX = flowX.cuda() flowY = flowY.cuda() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, _, flowXprediction, flowYprediction = model( inputVariable) #Reshaping predictions and inputs in order #to correctly regress on the inputs flowXprediction = flowXprediction.view(-1) flowX = torch.reshape(flowX, (-1, )).float() flowYprediction = flowYprediction.view(-1) flowY = torch.reshape(flowY, (-1, )).float() #print(f'Prediction: {flowXprediction.size()}') #print(f'Input : {flowX.size()}') #sys.exit() lossX = alphaX * loss_fn_regression(flowXprediction, flowX) lossY = alphaY * loss_fn_regression(flowYprediction, flowY) loss = loss_fn(output_label, labelVariable) #Weighting the loss of the ss task #by multiplying it by alpha total_loss = loss + lossX + lossY total_loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() x_loss += lossX.item() y_loss += lossY.item() epoch_loss += loss.item() optim_scheduler.step() avg_x_loss = x_loss / iterPerEpoch avg_y_loss = y_loss / iterPerEpoch avg_loss = epoch_loss / iterPerEpoch trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) print('X loss after {} epoch = {}% '.format(epoch + 1, avg_x_loss)) print('Y loss after {} epoch = {}% '.format(epoch + 1, avg_y_loss)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) writer.add_scalar('x_train_loss', avg_x_loss, epoch + 1) writer.add_scalar('y_train_loss', avg_y_loss, epoch + 1) train_log_loss.write('Training X loss after {} epoch= {}'.format( epoch + 1, avg_x_loss)) train_log_loss.write('Training Y loss after {} epoch= {}'.format( epoch + 1, avg_y_loss)) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if valDatasetDir is not None: model.train(False) val_loss_epoch = 0 val_iter = 0 val_x_loss = 0 val_y_loss = 0 val_samples = 0 numCorr = 0 mmap_loss = 0 with torch.no_grad(): #for j, (inputs, targets) in enumerate(val_loader): for flowX, flowY, inputs, targets in val_loader: val_iter += 1 val_samples += inputs.size(0) flowX = flowX.cuda() flowY = flowY.cuda() inputVariable = Variable( inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda(async=True)) #labelVariable = Variable(targets.cuda()) output_label, _, flowXprediction, flowYprediction = model( inputVariable) #Reshaping predictions and inputs in order #to correctly regress on the inputs flowXprediction = flowXprediction.view(-1) flowX = torch.reshape(flowX, (-1, )).float() flowYprediction = flowXprediction.view(-1) flowY = torch.reshape(flowX, (-1, )).float() lossX = alphaX * loss_fn_regression(flowXprediction, flowX) lossY = alphaY * loss_fn_regression(flowYprediction, flowY) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() val_x_loss += lossX.item() val_y_loss += lossY.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() avg_x_val_loss = val_x_loss / val_iter avg_y_val_loss = val_y_loss / val_iter val_accuracy = torch.true_divide(numCorr, val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val X Loss after {} epochs, loss = {}'.format( epoch + 1, avg_x_val_loss)) print('Val Y Loss after {} epochs, loss = {}'.format( epoch + 1, avg_y_val_loss)) print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format( epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val x/epoch_loss', avg_x_val_loss, epoch + 1) writer.add_scalar('val y/epoch_loss', avg_y_val_loss, epoch + 1) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val X Loss after {} epochs = {}\n'.format( epoch + 1, avg_x_val_loss)) val_log_loss.write('Val Y Loss after {} epochs = {}\n'.format( epoch + 1, avg_y_val_loss)) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_rgb_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, regressor): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() model_folder = os.path.join('./', out_dir, dataset, 'MS',str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') train_log_loss_ms= open((model_folder + '/train_log_loss_ms.txt'), 'w') val_log_loss_ms = open((model_folder + '/val_log_loss_ms.txt'), 'w') train_log_acc_ms= open((model_folder + '/train_log_acc_ms.txt'), 'w') val_log_acc_ms = open((model_folder + '/val_log_acc_ms.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224)]) vid_seq_train = makeDataset(train_data_dir, spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png',phase='train', regressor=regressor) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if val_data_dir is not None: vid_seq_val = makeDataset(val_data_dir, spatial_transform=Compose([Scale(256), CenterCrop(224)]), seqLen=seqLen, fmt='.png',phase='test', regressor=regressor) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() train_params = [] if stage == 1: model = attentionModel_ml(num_classes=num_classes, mem_size=memSize, regressor=regressor) model.train(False) for params in model.parameters(): params.requires_grad = False else: model = attentionModel_ml(num_classes=num_classes, mem_size=memSize, regressor=regressor) model.load_state_dict(torch.load(stage1_dict),strict=False) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] for params in model.conv.parameters(): params.requires_grad = True train_params += [params] for params in model.clas.parameters(): params.requires_grad = True train_params += [params] model.conv.train(True) model.clas.train(True) model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model.cuda() loss_fn = nn.CrossEntropyLoss() loss_fms = nn.NLLLoss() loss_reg = nn.MSELoss() optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 numCorrTrain_ms = 0 trainSamples = 0 iterPerEpoch = 0 epoch_loss_ms = 0 model.lstm_cell.train(True) model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1) if stage == 2: model.conv.train(True) model.clas.train(True) model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for i, (inputs ,binary_map, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, output_ms = model(inputVariable) loss = loss_fn(output_label, labelVariable) if stage==2 : loss.backward(retain_graph=True) else: loss.backward() if regressor == 0: binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).type(torch.LongTensor).cuda()) output_ms = output_ms.view(-1,2) elif regressor == 1: binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).cuda()) output_ms = output_ms.view(-1) binary_map =binary_map.contiguous().view(-1) if stage==2: if regressor == 1: loss_ms=loss_reg(output_ms, binary_map) loss_ms.backward() epoch_loss_ms+=loss_ms.item() elif regressor == 0: loss_ms=loss_fn(output_ms, binary_map) loss_ms.backward() _, predicted = torch.max(output_ms.data, 1) numCorrTrain_ms += torch.sum(predicted == binary_map.data).data.item() epoch_loss_ms+=loss_ms.item() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += torch.sum(predicted == labelVariable.data).data.item() epoch_loss += loss.item() avg_loss = epoch_loss/iterPerEpoch if stage ==2: trainAccuracy = (numCorrTrain_ms / trainSamples) * 100 avg_loss_ms= epoch_loss_ms/iterPerEpoch #avg_loss = avg_loss + avg_loss_ms train_log_loss_ms.write('Train Loss MS after {} epochs = {}\n'.format(epoch + 1, avg_loss_ms)) if regressor == 0:train_log_acc_ms.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy)) trainAccuracy = (numCorrTrain / trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch+1) writer.add_scalar('train/accuracy', trainAccuracy, epoch+1) train_log_loss.write('Train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss)) train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy)) if val_data_dir is not None: if (epoch+1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 numCorr_ms = 0 epoch_loss_ms_val=0 for j, (inputs, binary_map, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) labelVariable = Variable(targets.cuda(async=True), volatile=True) output_label, output_ms = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() if regressor == 0: binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).type(torch.LongTensor).cuda()) output_ms = output_ms.view(-1,2) elif regressor == 1: binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).cuda()) output_ms = output_ms.view(-1) binary_map =binary_map.contiguous().view(-1) if stage==2: if regressor == 1: loss_ms=loss_reg(output_ms, binary_map) epoch_loss_ms_val+=loss_ms.item() elif regressor == 0: loss_ms=loss_fn(output_ms, binary_map) _, predicted = torch.max(output_ms.data, 1) numCorr_ms += torch.sum(predicted == binary_map.data).data.item() epoch_loss_ms_val+=loss_ms.item() _, predicted = torch.max(output_label.data, 1) numCorr += torch.sum(predicted == labelVariable.data).data.item() avg_val_loss = val_loss_epoch / val_iter if stage ==2: avg_loss_ms= epoch_loss_ms_val/ val_iter val_accuracy = (numCorr_ms / val_samples) * 100 #avg_loss = avg_loss + avg_loss_ms val_log_loss_ms.write('Val Loss MS after {} epochs = {}\n'.format(epoch + 1, avg_loss_ms)) if regressor == 0:val_log_acc_ms.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) val_accuracy = (numCorr / val_samples) * 100 print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_ms_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch+1) % 10 == 0: save_path_model = (model_folder + '/model_ms_state_dict_epoch' + str(epoch+1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() train_log_loss_ms.close() val_log_loss_ms.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close() optim_scheduler.step()
train_logger = Logger(os.path.join(cfg.custom_logdir, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( os.path.join(cfg.custom_logdir, 'train_batch.log'), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) optimizer = model.get_optimizer(lr1=cfg.lr, lr2=cfg.lr2) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=cfg.lr_patience) print('##########################################') print('####### val') print('##########################################') spatial_transform = Compose([ Scale(cfg.sample_size), CenterCrop(cfg.sample_size), ToTensor(cfg.norm_value), norm_method ]) temporal_transform = LoopPadding(cfg.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(cfg, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.n_threads, drop_last=False, pin_memory=True) val_logger = Logger(os.path.join(cfg.custom_logdir, 'val.log'), ['epoch', 'loss', 'acc'])
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, regression, rloss, debug, verbose, CAM): # GTEA 61 num_classes = 61 # Train/Validation/Test split train_splits = ["S1", "S3", "S4"] val_splits = ["S2"] if debug: n_workers = 0 device = 'cpu' else: n_workers = 4 device = 'cuda' model_folder = os.path.join( './', out_dir, dataset, 'rgb', 'stage' + str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), # ToTensor(), # normalize ]) transform_rgb = Compose([ToTensor(), normalize]) transform_MS = Compose([Resize((7, 7)), ToTensor()]) vid_seq_train = makeDataset(train_data_dir, splits=train_splits, spatial_transform=spatial_transform, transform_rgb=transform_rgb, transform_MS=transform_MS, seqLen=seqLen, fmt='.png', regression=regression) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=n_workers, pin_memory=True) vid_seq_val = makeDataset(train_data_dir, splits=val_splits, spatial_transform=Compose( [Scale(256), CenterCrop(224)]), transform_rgb=transform_rgb, transform_MS=transform_MS, seqLen=seqLen, fmt='.png', regression=regression, verbose=False) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=n_workers, pin_memory=True) valInstances = vid_seq_val.__len__() ''' if val_data_dir is not None: vid_seq_val = makeDataset(val_data_dir, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), seqLen=seqLen, fmt='.jpg') val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valInstances = vid_seq_val.__len__() ''' trainInstances = vid_seq_train.__len__() train_params = [] if stage == 1: if regression: model = SelfSupervisedAttentionModel(num_classes=num_classes, mem_size=memSize, n_channels=1) else: model = SelfSupervisedAttentionModel(num_classes=num_classes, mem_size=memSize) model.train(False) for params in model.parameters(): params.requires_grad = False else: if regression: model = SelfSupervisedAttentionModel(num_classes=num_classes, mem_size=memSize, n_channels=1) else: model = SelfSupervisedAttentionModel(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(stage1_dict), strict=False) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) # Add params from ms_module for params in model.ms_module.parameters(): params.requires_grad = True train_params += [params] for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model.ms_module.train(True) model.to(device) # wandb.init(project="first_person_action_recognition") loss_fn = nn.CrossEntropyLoss() if regression: if rloss == 'MSE': # Mean Squared Error loss loss_ms_fn = nn.MSELoss() # it should work elif rloss == 'L1': # L1 loss loss_ms_fn = nn.L1Loss() elif rloss == 'SmoothL1': # Huber Loss or Smooth L1 Loss loss_ms_fn = nn.SmoothL1Loss() elif rloss == 'KLdiv': # Kullback-Leiber Loss loss_ms_fn = nn.KLDivLoss() else: # classification loss_ms_fn = nn.CrossEntropyLoss() # TODO: check paper Planamente optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 #model.train(True) model.lstm_cell.train(True) model.classifier.train(True) if stage == 2: model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) model.ms_module.train(True) for i, (inputsRGB, inputsMS, targets) in enumerate(train_loader): # Inputs: # - inputsRGB : the rgb frame input # Labels : # - inputsMS : the motion task label # - targets : output train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = inputsRGB.permute(1, 0, 2, 3, 4).to(device) labelVariable = targets.to(device) msVariable = inputsMS.to(device) trainSamples += inputsRGB.size(0) output_label, _, output_ms = model(inputVariable, device) loss_c = loss_fn(output_label, labelVariable) if regression: msVariable = torch.reshape( msVariable, (seqLen * 7 * 7, msVariable.size(0))) output_ms = torch.sigmoid(output_ms) output_ms = torch.reshape(output_ms, (seqLen * 7 * 7, output_ms.size(0))) else: # classification task msVariable = torch.reshape( msVariable, (seqLen * 7 * 7, msVariable.size(0))).long() output_ms = torch.reshape( output_ms, (seqLen * 7 * 7, 2, output_ms.size(0))) # loss_ms = loss_ms_fn(output_ms, msVariable) loss = loss_c + loss_ms if verbose: print(loss_c) print(loss_ms) print(loss) print() # loss = loss_fn(output_label, labelVariable) + loss_ms_fn(output_ms, inputsMS) # TODO (forse): invertire 0 e 1 dim per inputsMS # output1 = F.softmax(torch.reshape(output_ms, (32, 7, 2, 7*7))[0, 0, :, :], dim=0) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.to(device)).sum() epoch_loss += loss.data.item() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = (numCorrTrain.data.item() / trainSamples) * 100 train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) # VALIDATION PHASE #if val_data_dir is not None: if (epoch + 1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 for j, (inputsRGB, inputsMS, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputsRGB.size(0) inputVariable = inputsRGB.permute(1, 0, 2, 3, 4).to( device) # la permutazione è a solo scopo di computazione labelVariable = targets.to(device) msVariable = inputsMS.to(device) output_label, _, output_ms = model(inputVariable, device) loss_c = loss_fn(output_label, labelVariable) if regression: msVariable = torch.reshape( msVariable, (seqLen * 7 * 7, msVariable.size(0))) output_ms = torch.sigmoid(output_ms) output_ms = torch.reshape( output_ms, (seqLen * 7 * 7, output_ms.size(0))) else: # classification task msVariable = torch.reshape( msVariable, (seqLen * 7 * 7, msVariable.size(0))).long() output_ms = torch.reshape( output_ms, (seqLen * 7 * 7, 2, output_ms.size(0))) loss_ms = loss_ms_fn(output_ms, msVariable) val_loss = loss_c + loss_ms # val_loss = loss_fn(output_label, labelVariable) # TODO: add ms Loss val_loss_epoch += val_loss.data.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.to(device)).sum() val_accuracy = (numCorr.data.item() / val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Valid: Epoch = {} | Loss {} | Accuracy = {}'.format( epoch + 1, avg_val_loss, val_accuracy)) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_rgb_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy '''else: if (epoch+1) % 10 == 0: save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch+1) + '.pth') torch.save(model.state_dict(), save_path_model) ''' optim_scheduler.step() train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close()
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen, memSize): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) testBatchSize = 1 spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.jpg', phase='Test', seqLen=seqLen) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=2, pin_memory=True) model = twoStreamAttentionModel(stackSize=5, memSize=512, num_classes=num_classes) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorrTwoStream = 0 predicted_labels = [] true_labels = [] for j, (inputFlow, inputFrame, targets) in enumerate(test_loader): inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda(), volatile=True) inputVariableFlow = Variable(inputFlow.cuda(), volatile=True) output_label = model(inputVariableFlow, inputVariableFrame) _, predictedTwoStream = torch.max(output_label.data, 1) numCorrTwoStream += (predictedTwoStream == targets.cuda()).sum() predicted_labels.append(predictedTwoStream) true_labels.append(targets) test_accuracyTwoStream = (numCorrTwoStream / float(test_samples)) * 100 print('Accuracy {:.02f}%'.format(test_accuracyTwoStream)) cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] ticks = np.linspace(0, 60, num=61) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(ticks, fontsize=6) plt.yticks(ticks, fontsize=6) plt.grid(True) plt.clim(0, 1) plt.savefig(dataset + '-twoStreamJoint.jpg', bbox_inches='tight') plt.show()
def main(): global args global best_prec1 args = parser.parse_args() print('Training arguments:') for k, v in vars(args).items(): print('\t{}: {}'.format(k, v)) if args.data_name == 'ucf101': num_class = 101 elif args.data_name == 'hmdb51': num_class = 51 elif args.data_name == 'mine': num_class = 2 else: raise ValueError('Unknown dataset ' + args.data_name) model = Model(num_class, args.num_segments, args.representation, base_model=args.arch) print(model) if 'resnet3D' in args.arch: train_crop_min_ratio = 0.75 train_crop_min_scale = 0.25 mean = [0.4345, 0.4051, 0.3775] std = [0.2768, 0.2713, 0.2737] value_scale = 1 train_transform = Compose([ RandomResizedCrop( model.crop_size, (train_crop_min_scale, 1.0), (train_crop_min_ratio, 1.0 / train_crop_min_ratio)), RandomHorizontalFlip(), ToTensor(), ScaleValue(value_scale), Normalize(mean, std) ]) test_trainsform = Compose([ Resize(model.crop_size), CenterCrop(model.crop_size), ToTensor(), # range [0, 255] -> [0.0,1.0] ScaleValue(1), Normalize(mean, std) ]) train_loader = torch.utils.data.DataLoader( CoviarDataSet( args.data_root, args.data_name, video_list=args.train_list, num_segments=args.num_segments, representation=args.representation, transform=model.get_augmentation(), #train_transform, is_train=True, accumulate=(not args.no_accumulation), model_name=args.arch), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, worker_init_fn=worker_init_fn) val_loader = torch.utils.data.DataLoader( CoviarDataSet( args.data_root, args.data_name, video_list=args.test_list, num_segments=args.num_segments, representation=args.representation, transform=torchvision.transforms.Compose([ GroupScale(int(model.scale_size)), GroupCenterCrop(model.crop_size) ]), #test_trainsform, is_train=True, accumulate=(not args.no_accumulation), model_name=args.arch), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, worker_init_fn=worker_init_fn) model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() cudnn.benchmark = True params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): decay_mult = 0.0 if 'bias' in key else 1.0 if ('module.base_model.conv1' in key or 'module.base_model.bn1' in key or 'data_bn' in key) and args.representation in ['mv', 'residual']: lr_mult = 0.1 elif '.fc.' in key: lr_mult = 1.0 else: lr_mult = 0.01 params += [{ 'params': value, 'lr': args.lr, 'lr_mult': lr_mult, 'decay_mult': decay_mult }] #optimizer = torch.optim.SGD(params, weight_decay=0.001, momentum=0.9, nesterov=False) #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10) optimizer = torch.optim.Adam(params, weight_decay=args.weight_decay, eps=0.001) criterion = torch.nn.CrossEntropyLoss().cuda() for epoch in range(args.epochs): cur_lr = adjust_learning_rate(optimizer, epoch, args.lr_steps, args.lr_decay) #cur_lr = get_lr(optimizer) train(train_loader, model, criterion, optimizer, epoch, cur_lr) #prec1, prev_val_loss = validate(val_loader, model, criterion) #scheduler.step(prev_val_loss) if epoch % args.eval_freq == 0 or epoch == args.epochs - 1: prec1, _ = validate(val_loader, model, criterion) # 紀錄訓練歷程 np.savez("train_history/train_history.npz", loss=np.array(train_loss), top1=np.array(train_prec), lr=np.array(train_lr)) np.savez("train_history/valid_history.npz", loss=np.array(valid_loss), top1=np.array(valid_prec)) is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if is_best or epoch % SAVE_FREQ == 0: save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best, filename='checkpoint.pth.tar')
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) outputs = F.softmax(outputs, dim=1) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = {'video': video_name, 'clips': []} for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } label = get_video_results(video_outputs[i], class_names, 5) clip_results['label'] = label results['clips'].append(clip_results) # _, max_indices = video_outputs.max(dim=1) # for i in range(video_outputs.size(0)): # clip_results = { # 'segment': video_segments[i].tolist(), # } # if opt.mode == 'score': # clip_results['label'] = class_names[max_indices[i]] # clip_results['scores'] = video_outputs[i, max_indices[i]].item() # elif opt.mode == 'feature': # clip_results['features'] = video_outputs[i].tolist() # results['clips'].append(clip_results) # average_scores = torch.mean(video_outputs, dim=0) # video_results, predicted_labels = get_video_results(average_scores, class_names, 1) # video_results = get_video_results(average_scores, class_names, 5) # results = { # 'video': video_name, # 'result': video_results, # # 'predicted_labels': predicted_labels # } return results
def main_run(version, stage, train_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, mem_size): num_classes = 61 model_folder = os.path.join("./", out_dir, version) if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Train val partitioning train_usr = ["S1", "S3", "S4"] val_usr = ["S2"] # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose( [Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize]) vid_seq_train = makeDataset(train_data_dir, train_usr, spatial_transform=spatial_transform, seqLen=seqLen) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) vid_seq_val = makeDataset(train_data_dir, val_usr, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), seqLen=seqLen) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) train_params = [] # stage 1: train only lstm if stage == 1: model = attentionModel(num_classes=num_classes, mem_size=mem_size) model.train(False) for params in model.parameters(): params.requires_grad = False # stage 2: train lstm, layer4, spatial attention and final fc else: model = attentionModel(num_classes=num_classes, mem_size=mem_size) model.load_state_dict(torch.load(stage1_dict)) # pretrained model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): # fully connected layer params.requires_grad = True train_params += [params] model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for params in model.lstm_cell.parameters(): # for both stages we train the lstm params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): # for both stages we train the last classifier (after the lstm and avg pooling) params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model.cuda() loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): optim_scheduler.step() epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.lstm_cell.train(True) model.classifier.train(True) if stage == 2: model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for i, (inputs, inputsF, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).to(DEVICE)) labelVariable = Variable(targets.to(DEVICE)) trainSamples += inputs.size(0) output_label, _ = model(inputVariable) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.to(DEVICE)).sum() # evaluating number of correct classifications epoch_loss += loss.data.item() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = (numCorrTrain.data.item() / trainSamples) train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch + 1, avg_loss)) # log file train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch + 1, trainAccuracy)) # log file print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_loss, trainAccuracy)) if (epoch + 1) % VAL_FREQUENCY == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 for j, (inputs, inputsF, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).to(DEVICE)) labelVariable = Variable(targets.to(DEVICE)) output_label, _ = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.data.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.to(DEVICE)).sum() # evaluating number of correct classifications val_accuracy = (numCorr.data.item() / val_samples) avg_val_loss = val_loss_epoch / val_iter print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy)) val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss)) # log file val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) # log file if val_accuracy > min_accuracy: save_path_model = ( model_folder + '/model_rgb_state_dict.pth') # every epoch, check if the val accuracy is improved, if so, save that model torch.save(model.state_dict(), save_path_model) # in that way, even if the model overfit, you will get always the best model min_accuracy = val_accuracy # in this way you don't have to care too much about the number of epochs train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close()
def main_run(dataset, trainDir, valDir, outDir, stackSize, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, uniform_sampling, debug): # GTEA 61 num_classes = 61 # Train/Validation/Test split train_splits = ["S1", "S3", "S4"] val_splits = ["S2"] if debug: n_workers = 0 device = 'cpu' else: n_workers = 4 device = 'cuda' min_accuracy = 0 model_folder = os.path.join('./', outDir, dataset, 'flow') # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Dir {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') #num_workers = 4 # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vid_seq_train = makeDataset(trainDir, train_splits, spatial_transform=spatial_transform, sequence=False, stackSize=stackSize, fmt='.png', uniform_sampling=uniform_sampling) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, sampler=None, num_workers=n_workers, pin_memory=True) vid_seq_val = makeDataset(trainDir, val_splits, spatial_transform=Compose([ Scale(256), CenterCrop(224), ToTensor(), normalize ]), sequence=False, stackSize=stackSize, fmt='.png', phase='Test', uniform_sampling=uniform_sampling) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=n_workers, pin_memory=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() print('Number of samples in the dataset: training = {} | validation = {}'. format(trainInstances, valInstances)) model = flow_resnet34(True, channels=2 * stackSize, num_classes=num_classes) model.train(True) train_params = list(model.parameters()) model.to(device) loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.SGD(train_params, lr=lr1, momentum=0.9, weight_decay=5e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.train(True) for i, (inputs, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = inputs.to(device) labelVariable = targets.to(device) trainSamples += inputs.size(0) output_label, _ = model(inputVariable) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.to(device)).sum() epoch_loss += loss.data.item() optim_scheduler.step() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = (numCorrTrain.data.item() / trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if (epoch + 1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 for j, (inputs, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) inputVariable = inputs.to(device) labelVariable = targets.to(device) output_label, _ = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.data.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.to(device)).sum() val_accuracy = (numCorr.data.item() / val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_val_loss, val_accuracy)) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_flow_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch + 1) % 10 == 0: save_path_model = (model_folder + '/model_flow_state_dict_epoch' + str(epoch + 1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close()
def main_run(dataset, root_dir, checkpoint_path, seqLen, testBatchSize, memSize, outPool_size, split): mean=[0.485, 0.456, 0.406] std=[0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) test_split = split seqLen = seqLen memSize = memSize c_cam_classes = outPool_size dataset = dataset testBatchSize = testBatchSize checkpoint_path = checkpoint_path if dataset == 'gtea_61': num_classes = 61 elif dataset == 'gtea_71': num_classes = 71 elif dataset == 'egtea_gaze+': num_classes = 106 else: print('Wrong dataset') sys.exit() dataset_dir = os.path.join(root_dir, dataset) print('Preparing dataset...') if dataset == 'egtea_gaze+': trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames = gen_split_egtea_gazePlus(dataset_dir, test_split) else: trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames, _ = gen_split(dataset_dir, test_split) vid_seq_test = makeDataset(testDatasetF, testLabels, testNumFrames, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), fmt='.jpg', seqLen=seqLen) print('Number of test samples = {}'.format(vid_seq_test.__len__())) print("Dataset shape: ", len(vid_seq_test.__getitem__(0)), vid_seq_test.__getitem__(0)[0].shape , end='\n\n\n') test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=0, pin_memory=True) model = attentionModel(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes) if os.path.exists(checkpoint_path): print('Loading weights from checkpoint file {}'.format(checkpoint_path)) else: print('Checkpoint file {} does not exist'.format(checkpoint_path)) sys.exit() last_checkpoint = torch.load(checkpoint_path) #, map_location=torch.device('cpu')) model.load_state_dict(last_checkpoint['model_state_dict']) model.cuda() model.train(False) model.eval() print('Testing...') test_iter = 0 test_samples = 0 numCorr = 0 for j, (inputs, targets) in tqdm(enumerate(test_loader)): test_iter += 1 test_samples += inputs.size(0) with torch.no_grad(): print(inputs.shape, targets.shape) inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) output_label, _ = model(inputVariable) del inputVariable _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() test_accuracy = (numCorr.cpu().item() / test_samples) * 100 print('Test Accuracy after = {}%'.format(test_accuracy))
def get_val_utils(opt): normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [ Resize(opt.sample_size), CenterCrop(opt.sample_size), ToTensor() ] if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( TemporalEvenCrop(opt.sample_duration, opt.n_val_samples)) temporal_transform = TemporalCompose(temporal_transform) val_data_checkpoint_path = opt.result_path / Path('val_data_' + opt.dataset + '.data') val_collate_checkpoint_path = opt.result_path / Path('val_coll_' + opt.dataset + '.data') if os.path.exists(val_data_checkpoint_path) and os.path.exists( val_collate_checkpoint_path) and opt.save_load_data_checkpoint: with open(val_data_checkpoint_path, 'rb') as filehandle: val_data = pickle.load(filehandle) with open(val_collate_checkpoint_path, 'rb') as filehandle: collate_fn = pickle.load(filehandle) else: val_data, collate_fn = get_validation_data( opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) if opt.save_load_data_checkpoint: with open(val_data_checkpoint_path, 'wb') as filehandle: pickle.dump(val_data, filehandle) with open(val_collate_checkpoint_path, 'wb') as filehandle: pickle.dump(collate_fn, filehandle) if opt.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data, shuffle=False) else: val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=(opt.batch_size // opt.n_val_samples), shuffle=False, num_workers=opt.n_threads, pin_memory=True, sampler=val_sampler, worker_init_fn=worker_init_fn, collate_fn=collate_fn) if opt.is_master_node: val_logger = Logger(opt.result_path / 'val.log', ['epoch', 'loss', 'acc']) else: val_logger = None return val_loader, val_logger
def main_run(stage, train_data_dir, val_data_dir, stage1Dict, stage1Dict_rgb, stage1Dict_fc, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize): #dataset = 'gtea61' num_classes = 61 model_folder = os.path.join( './', out_dir, 'attConvLSTMDoubleResnet', str(seqLen), 'stage' + str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vid_seq_train = makeDataset(train_data_dir, seqLen=seqLen, fmt='.png', users=['S1', 'S3', 'S4'], spatial_transform=spatial_transform) trainInstances = vid_seq_train.__len__() train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if val_data_dir is not None: vid_seq_val = makeDataset(val_data_dir, seqLen=seqLen, fmt='.png', users=['S2'], train=False, spatial_transform=Compose([ Scale(256), CenterCrop(224), ToTensor(), normalize ])) valInstances = vid_seq_val.__len__() val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) train_params = [] model = twoStreamFlowCol(num_classes=num_classes, memSize=memSize, frameModel=stage1Dict_rgb, flowModel=stage1Dict_fc) model.train(False) for params in model.parameters(): params.requires_grad = False model.train(False) train_params = [] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.lstm_cell.parameters(): train_params += [params] params.requires_grad = True for params in model.frameModel.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.fc.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] for params in model.flowModel.lstm_cell.parameters(): train_params += [params] params.requires_grad = True for params in model.flowModel.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.flowModel.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.flowModel.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.flowModel.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.flowModel.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.flowModel.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.flowModel.resNet.fc.parameters(): params.requires_grad = True train_params += [params] model.cuda() trainSamples = vid_seq_train.__len__() min_accuracy = 0 loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.SGD(train_params, lr=lr1, momentum=0.9, weight_decay=5e-4) optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1) #for i, (inputs, targets) in enumerate(train_loader): for inputs, inputsSN, targets in train_loader: train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) inputSNVariable = Variable(inputsSN.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, _ = model(inputVariable, inputSNVariable) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epoch_loss += loss.item() optim_scheduler.step() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if val_data_dir is not None: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 with torch.no_grad(): #for j, (inputs, targets) in enumerate(val_loader): for inputs, inputsSN, targets in val_loader: val_iter += 1 val_samples += inputs.size(0) inputVariable = Variable( inputs.permute(1, 0, 2, 3, 4).cuda()) inputSNVariable = Variable( inputsSN.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda(async=True)) #labelVariable = Variable(targets.cuda()) output_label, _ = model(inputVariable, inputSNVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() val_accuracy = torch.true_divide(numCorr, val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format( epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_rgb_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
lr=opt.learning_rate, momentum=opt.momentum, # dampening=dampening, weight_decay=opt.weight_decay, nesterov=True) scheduler = lr_scheduler.MultiStepLR(optimizer, [15, 25, 40, 45, 50, 55, 60], gamma=0.1) if not opt.no_val: ##-------------------------------------------------------------------------------------------- if opt.model == 'I3D': spatial_transform = Compose([ Scale((256, 256)), CenterCrop(224), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(0) target_transform = ClassLabel() validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=1, shuffle=False, num_workers=opt.n_threads, pin_memory=True) elif opt.model == 'resnet_50': spatial_transform = Compose([ Scale(256),
def main_run(dataset, flowModel, rgbModel, stackSize, seqLen, memSize, trainDatasetDir, valDatasetDir, outDir, trainBatchSize, valBatchSize, lr1, numEpochs, decay_step, decay_factor, uniformSampling): # GTEA 61 num_classes = 61 # Train/Validation/Test split train_splits = ["S1", "S3", "S4"] val_splits = ["S2"] directory = trainDatasetDir model_folder = os.path.join( './', outDir, dataset, 'twoStream') # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Dir {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vid_seq_train = makeDataset(directory, train_splits, spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', seqLen=seqLen, uniform_sampling=uniformSampling) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) vid_seq_val = makeDataset(directory, val_splits, spatial_transform=Compose([ Scale(256), CenterCrop(224), ToTensor(), normalize ]), sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen, uniform_sampling=uniformSampling) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valSamples = vid_seq_val.__len__() model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=stackSize, memSize=memSize, num_classes=num_classes) for params in model.parameters(): params.requires_grad = False model.train(False) train_params = [] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.lstm_cell.parameters(): train_params += [params] params.requires_grad = True for params in model.frameModel.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.fc.parameters(): params.requires_grad = True train_params += [params] base_params = [] for params in model.flowModel.layer4.parameters(): base_params += [params] params.requires_grad = True model.cuda() trainSamples = vid_seq_train.__len__() min_accuracy = 0 loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.SGD([ { 'params': train_params }, { 'params': base_params, 'lr': 1e-4 }, ], lr=lr1, momentum=0.9, weight_decay=5e-4) optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=decay_step, gamma=decay_factor) train_iter = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 iterPerEpoch = 0 model.classifier.train(True) model.flowModel.layer4.train(True) for j, (inputFlow, inputFrame, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariableFlow = inputFlow.to(DEVICE) inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE) labelVariable = targets.to(DEVICE) output_label = model(inputVariableFlow, inputVariableFrame) loss = loss_fn(torch.log_softmax(output_label, dim=1), labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epoch_loss += loss.data.item() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = (numCorrTrain.item() / trainSamples) * 100 print('Average training loss after {} epoch = {} '.format( epoch + 1, avg_loss)) print('Training accuracy after {} epoch = {}% '.format( epoch + 1, trainAccuracy)) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) # Validation Phase #if valDatasetDir is not None: if (epoch + 1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 numCorr = 0 for j, (inputFlow, inputFrame, targets) in enumerate(val_loader): val_iter += 1 inputVariableFlow = inputFlow.to(DEVICE) inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE) labelVariable = targets.to(DEVICE) output_label = model(inputVariableFlow, inputVariableFrame) loss = loss_fn(torch.log_softmax(output_label, dim=1), labelVariable) val_loss_epoch += loss.data.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == labelVariable.data).sum() val_accuracy = (numCorr.item() / valSamples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val Loss after {} epochs, loss = {}'.format( epoch + 1, avg_val_loss)) print('Val Accuracy after {} epochs = {}%'.format( epoch + 1, val_accuracy)) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_twoStream_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy #else: # if (epoch + 1) % 10 == 0: # save_path_model = (model_folder + '/model_twoStream_state_dict_epoch' + str(epoch + 1) + '.pth') # torch.save(model.state_dict(), save_path_model) optim_scheduler.step() train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close()
def main_run(dataset, stage, root_dir, out_dir, stage1_dict, seqLen, trainBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, outPool_size, split, evalInterval, regression, rloss, debug): if debug: n_workers = 0 n_workers_test = 0 device = 'cpu' else: n_workers = 4 n_workers_test = 2 device = 'cuda' # Train/Validation/Test split train_splits = ["S1", "S3", "S4"] val_splits = ["S2"] test_split = split mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) stage = stage #test_split = split seqLen = seqLen memSize = memSize c_cam_classes = outPool_size dataset = dataset best_acc = 0 if stage == 1: trainBatchSize = trainBatchSize testBatchSize = trainBatchSize lr1 = lr1 decay_factor = decay_factor decay_step = decay_step numEpochs = numEpochs elif stage == 2 or stage == 3: trainBatchSize = trainBatchSize testBatchSize = trainBatchSize lr1 = lr1 decay_factor = decay_factor decay_step = decay_step numEpochs = numEpochs num_classes = 61 dataset_dir = root_dir #model_folder = os.path.join('.', out_dir, dataset, str(test_split)) model_folder = os.path.join('./', out_dir, 'stage' + str(stage)) if not os.path.exists(model_folder): os.makedirs(model_folder) else: print('Directory {} exists!'.format(model_folder)) sys.exit() note_fl = open(model_folder + '/note.txt', 'w') note_fl.write('Number of Epochs = {}\n' 'lr = {}\n' 'Train Batch Size = {}\n' 'Sequence Length = {}\n' 'Decay steps = {}\n' 'Decay factor = {}\n' 'Memory size = {}\n' 'Memory cam classes = {}\n'.format(numEpochs, lr1, trainBatchSize, seqLen, decay_step, decay_factor, memSize, c_cam_classes)) note_fl.close() # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') train_log_loss_batch = open((model_folder + '/train_log_loss_batch.txt'), 'w') test_log_loss = open((model_folder + '/test_log_loss.txt'), 'w') test_log_acc = open((model_folder + '/test_log_acc.txt'), 'w') # Dataloaders spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) print('Preparing dataset...') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), # ToTensor(), # normalize ]) transform_rgb = Compose([ToTensor(), normalize]) transform_MS = Compose([Resize((7, 7)), ToTensor()]) vid_seq_train = makeDataset(dataset_dir, splits=train_splits, spatial_transform=spatial_transform, transform_rgb=transform_rgb, transform_MS=transform_MS, seqLen=seqLen, fmt='.png', regression=regression) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=n_workers, pin_memory=True) vid_seq_test = makeDataset(dataset_dir, splits=val_splits, spatial_transform=Compose( [Scale(256), CenterCrop(224)]), transform_rgb=transform_rgb, transform_MS=transform_MS, seqLen=seqLen, fmt='.png', regression=regression, verbose=False) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=n_workers, pin_memory=True) print('here') print('Number of train samples = {}'.format(vid_seq_train.__len__())) print('Number of test samples = {}'.format(vid_seq_test.__len__())) train_params = [] if stage == 1: if regression: model = attentionModel(num_classes=num_classes, mem_size=memSize, n_channels=1) else: model = attentionModel(num_classes=num_classes, mem_size=memSize) model.train(False) for params in model.parameters(): params.requires_grad = False elif stage == 2 or stage == 3: if regression: model = attentionModel(num_classes=num_classes, mem_size=memSize, n_channels=1, c_cam_classes=c_cam_classes) else: model = attentionModel(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes) #model = attentionModel(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes) if stage == 2: checkpoint_path = os.path.join( stage1_dict, 'last_checkpoint_stage' + str(1) + '.pth.tar') elif stage == 3: checkpoint_path = os.path.join( stage1_dict, 'last_checkpoint_stage' + str(2) + '.pth.tar') if os.path.exists(checkpoint_path): print('Loading weights from checkpoint file {}'.format( checkpoint_path)) else: print('Checkpoint file {} does not exist'.format(checkpoint_path)) sys.exit() last_checkpoint = torch.load(checkpoint_path) model.load_state_dict(last_checkpoint['model_state_dict'], strict=False) model.train(False) for params in model.parameters(): params.requires_grad = False for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] # Add params from ms_module if stage == 2: for params in model.ms_module.parameters(): params.requires_grad = True train_params += [params] for params in model.lsta_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.classifier.train(True) model.ms_module.train(True) model.to(device) loss_fn = nn.CrossEntropyLoss() if regression: if rloss == 'MSE': # Mean Squared Error loss loss_ms_fn = nn.MSELoss() # it should work elif rloss == 'L1': # L1 loss loss_ms_fn = nn.L1Loss() elif rloss == 'SmoothL1': # Huber Loss or Smooth L1 Loss loss_ms_fn = nn.SmoothL1Loss() elif rloss == 'KLdiv': # Kullback-Leiber Loss loss_ms_fn = nn.KLDivLoss() else: # classification loss_ms_fn = nn.CrossEntropyLoss() # TODO: check paper Planamente optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=5e-4, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 for epoch in range(numEpochs): #optim_scheduler.step() epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 # model.classifier.train(True) model.lsta_cell.train(True) model.classifier.train(True) if stage == 2: model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) model.ms_module.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1) for i, (inputs, inputsMS, targets) in enumerate(train_loader): # Inputs: # - inputsRGB : the rgb frame input # Labels : # - inputsMS : the motion task label # - targets : output train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = inputs.permute(1, 0, 2, 3, 4).to(device) labelVariable = targets.to(device) msVariable = inputsMS.to(device) trainSamples += inputs.size(0) output_label, _, output_ms = model(inputVariable, device) loss_c = loss_fn(output_label, labelVariable) if stage == 2: if regression: msVariable = torch.reshape( msVariable, (seqLen * 7 * 7, msVariable.size(0))) output_ms = torch.sigmoid(output_ms) output_ms = torch.reshape( output_ms, (seqLen * 7 * 7, output_ms.size(0))) else: # classification task msVariable = torch.reshape( msVariable, (seqLen * 7 * 7, msVariable.size(0))).long() output_ms = torch.reshape( output_ms, (seqLen * 7 * 7, 2, output_ms.size(0))) loss_ms = loss_ms_fn(output_ms, msVariable) loss = loss_c + loss_ms else: loss = loss_c loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.to(device)).sum() #print('Training loss after {} iterations = {} '.format(train_iter, loss.data.item())) #train_log_loss_batch.write('Training loss after {} iterations = {}\n'.format(train_iter, loss.data.item())) #writer.add_scalar('train/iter_loss', loss.data.item(), train_iter) epoch_loss += loss.data.item() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = (numCorrTrain / trainSamples) * 100 print('Average training loss after {} epoch = {} '.format( epoch + 1, avg_loss)) print('Training accuracy after {} epoch = {}% '.format( epoch + 1, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) save_path_model = os.path.join( model_folder, 'last_checkpoint_stage' + str(stage) + '.pth.tar') save_file = { 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer_fn.state_dict(), 'best_acc': best_acc, } torch.save(save_file, save_path_model) if (epoch + 1) % evalInterval == 0: #print('Testing...') model.train(False) test_loss_epoch = 0 test_iter = 0 test_samples = 0 numCorr = 0 for j, (inputs, inputsMS, targets) in enumerate(test_loader): #print('testing inst = {}'.format(j)) test_iter += 1 test_samples += inputs.size(0) inputVariable = inputs.permute(1, 0, 2, 3, 4).to(device) labelVariable = targets.to(device) msVariable = inputsMS.to(device) output_label, _, output_ms = model(inputVariable, device) test_loss_c = loss_fn(output_label, labelVariable) if stage == 2: if regression: msVariable = torch.reshape( msVariable, (seqLen * 7 * 7, msVariable.size(0))) output_ms = torch.sigmoid(output_ms) output_ms = torch.reshape( output_ms, (seqLen * 7 * 7, output_ms.size(0))) else: # classification task msVariable = torch.reshape( msVariable, (seqLen * 7 * 7, msVariable.size(0))).long() output_ms = torch.reshape( output_ms, (seqLen * 7 * 7, 2, output_ms.size(0))) test_loss_ms = loss_ms_fn(output_ms, msVariable) test_loss = test_loss_c + test_loss_ms else: test_loss = test_loss_c test_loss_epoch += test_loss.data.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.to(device)).sum() test_accuracy = (numCorr / test_samples) * 100 avg_test_loss = test_loss_epoch / test_iter print('Test Loss after {} epochs, loss = {}'.format( epoch + 1, avg_test_loss)) print('Test Accuracy after {} epochs = {}%'.format( epoch + 1, test_accuracy)) writer.add_scalar('test/epoch_loss', avg_test_loss, epoch + 1) writer.add_scalar('test/accuracy', test_accuracy, epoch + 1) test_log_loss.write('Test Loss after {} epochs = {}\n'.format( epoch + 1, avg_test_loss)) test_log_acc.write('Test Accuracy after {} epochs = {}%\n'.format( epoch + 1, test_accuracy)) if test_accuracy > best_acc: best_acc = test_accuracy save_path_model = os.path.join( model_folder, 'best_checkpoint_stage' + str(stage) + '.pth.tar') save_file = { 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer_fn.state_dict(), 'best_acc': best_acc, } torch.save(save_file, save_path_model) optim_scheduler.step() train_log_loss.close() train_log_acc.close() test_log_acc.close() train_log_loss_batch.close() test_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
def main(): parser = argparse.ArgumentParser(description="Run model against images") parser.add_argument( '--input-glob', default= 'data/kinetics_videos/jpg/yoga/0wHOYxjRmlw_000041_000051/image_000{41,42,43,44,45,46,47,48,49,50,41,42,43,44,45,46}.jpg', help="inputs") parser.add_argument("--depth", default="50", help="which model depth") args = parser.parse_args() model_file = model_files[args.depth] model_depth = int(args.depth) model = resnet.generate_model(model_depth=model_depth, n_classes=700, n_input_channels=3, shortcut_type="B", conv1_t_size=7, conv1_t_stride=1, no_max_pool=False, widen_factor=1.0) # model = load_pretrained_model(model, args.model, "resnet", 700) checkpoint = torch.load(model_file, map_location='cpu') arch = '{}-{}'.format("resnet", model_depth) print(arch, checkpoint['arch']) assert arch == checkpoint['arch'] if hasattr(model, 'module'): # I think this only for legacy models model.module.load_state_dict(checkpoint['state_dict']) else: model.load_state_dict(checkpoint['state_dict']) model.eval() image_clips = [] files = real_glob(args.input_glob) files = extend_to_length(files, 16) print(files) for f in files: img = Image.open(f).convert("RGB") image_clips.append(img) # print("EARLY", image_clips[0][0:4,0:4,0]) mean = [0.4345, 0.4051, 0.3775] std = [0.2768, 0.2713, 0.2737] normalize = Normalize(mean, std) sample_size = 112 spatial_transform = [Resize(sample_size)] spatial_transform.append(CenterCrop(sample_size)) spatial_transform.append(ToTensor()) spatial_transform.extend([ScaleValue(1), normalize]) spatial_transform = Compose(spatial_transform) # c = spatial_transform(image_clips[0]) # c.save("raw.png") model_clips = [] clip = [spatial_transform(img) for img in image_clips] model_clips.append(torch.stack(clip, 0).permute(1, 0, 2, 3)) model_clips = torch.stack(model_clips, 0) print("Final", model_clips.shape) print("PEEK", model_clips[0, 0, 0, 0:4, 0:4]) with torch.no_grad(): outputs = model(model_clips) print(outputs[0][0:10]) outputs = F.softmax(outputs, dim=1).cpu() sorted_scores, locs = torch.topk(outputs[0], k=3) print(locs[0]) video_results = [] for i in range(sorted_scores.size(0)): video_results.append({ 'label': magic_labels_700[locs[i].item()], 'score': sorted_scores[i].item() }) print(video_results)
center = 1 openpose_transform = { 'train': MultiScaleTorsoRandomCrop(scales, args.img_size), 'val': MultiScaleTorsoRandomCrop(np.linspace(center, center, num=1), args.img_size, centercrop=True) } spatial_transform = { 'train': Compose([ Scale(args.img_size), CenterCrop(args.img_size), RandomHorizontalFlip(), ColorJitter(brightness=0.1), ToTensor(1), Normalize(args.mean, args.std) ]), 'val': Compose([ Scale(args.img_size), CenterCrop(args.img_size), ToTensor(1), Normalize(args.mean, args.std) ]) } temporal_transform = {
def main_run(dataset, trainDir, valDir, outDir, stackSize, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() min_accuracy = 0 model_folder = os.path.join('./', outDir, dataset, 'flow') # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Dir {} exists!'.format(model_folder)) !rm -rf ./experiments #sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize]) vid_seq_train = makeDataset(trainDir, spatial_transform=spatial_transform, sequence=False, stackSize=stackSize, fmt='.png') train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, sampler=None, num_workers=4, pin_memory=True) valInstances=0 if valDir is not None: vid_seq_val = makeDataset(valDir, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), sequence=False, stackSize=stackSize, fmt='.png', phase='Test') val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() print('Number of samples in the dataset: training = {} | validation = {}'.format(trainInstances, valInstances)) model = flow_resnet34(True, channels=2*stackSize, num_classes=num_classes) model.train(True) train_params = list(model.parameters()) model.cuda() loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.SGD(train_params, lr=lr1, momentum=0.9, weight_decay=5e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 for epoch in range(numEpochs): optim_scheduler.step() epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1) for i, (inputs, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = Variable(inputs.cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, _ = model(inputVariable) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epoch_loss += loss.data[0] avg_loss = epoch_loss/iterPerEpoch trainAccuracy = (numCorrTrain / trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_loss, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch+1) writer.add_scalar('train/accuracy', trainAccuracy, epoch+1) train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch+1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch+1, trainAccuracy)) if valDir is not None: if (epoch+1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 for j, (inputs, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) inputVariable = Variable(inputs.cuda(), volatile=True) labelVariable = Variable(targets.cuda(async=True), volatile=True) output_label, _ = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.data[0] _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() val_accuracy = (numCorr / val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_flow_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch+1) % 10 == 0: save_path_model = (model_folder + '/model_flow_state_dict_epoch' + str(epoch+1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
def main_run(dataset, model_state_dict, dataset_dir, stackSize, numSeg): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, sequence=True, numSeg=numSeg, stackSize=stackSize, fmt='.jpg', phase='Test') test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) model = flow_resnet34(False, channels=2 * stackSize, num_classes=num_classes) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorr = 0 true_labels = [] predicted_labels = [] for j, (inputs, targets) in enumerate(test_loader): inputVariable = Variable(inputs[0].cuda(), volatile=True) output_label, _ = model(inputVariable) output_label_mean = torch.mean(output_label.data, 0, True) _, predicted = torch.max(output_label_mean, 1) numCorr += (predicted == targets[0]).sum() true_labels.append(targets) predicted_labels.append(predicted) test_accuracy = (numCorr / test_samples) * 100 print('Test Accuracy = {}%'.format(test_accuracy)) cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] ticks = np.linspace(0, 60, num=61) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(ticks, fontsize=6) plt.yticks(ticks, fontsize=6) plt.grid(True) plt.clim(0, 1) plt.savefig(dataset + '-flow.jpg', bbox_inches='tight') plt.show()
def main_run(model_state_dict, dataset_dir, seqLen, memSize, out_dir): model_folder = os.path.join('./', out_dir, 'attConvLSTMDoubleResnet', str(seqLen)) #dataset = 'gtea61' num_classes = 61 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, seqLen=seqLen, fmt='.png', train=False, spatial_transform=spatial_transform, users=['S2']) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) model = attentionDoubleResnet(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorr = 0 true_labels = [] predicted_labels = [] with torch.no_grad(): #for j, (inputs, targets) in enumerate(test_loader): for inputs, inputsSN, targets in test_loader: inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) inputSNVariable = Variable(inputsSN.permute(1, 0, 2, 3, 4).cuda()) output_label, _ = model(inputVariable, inputSNVariable) _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() true_labels.append(targets) predicted_labels.append(predicted.cpu()) test_accuracy = torch.true_divide(numCorr, test_samples) * 100 test_accuracy = 'Test Accuracy = {}%'.format(test_accuracy) print(test_accuracy) fil = open(model_folder + "/test_log_acc.txt", "w") fil.write(test_accuracy) fil.close() cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] ticks = np.linspace(0, 60, num=61) plt.figure(1, figsize=(12, 12), dpi=100.0) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(ticks, fontsize=6) plt.yticks(ticks, fontsize=6) plt.grid(True) plt.clim(0, 1) xy = np.arange(start=0, stop=61) plt.plot(xy, xy) plt.savefig(model_folder + '/cnf_matrix_normalized.png', bbox_inches='tight') plt.show()
mean = [0.4345, 0.4051, 0.3775] std = [0.2768, 0.2713, 0.2737] no_mean_norm = False no_std_norm = False sample_size = 112 value_scale = 1 input_type = 'rgb' sample_t_stride = 1 sample_duration = 16 inference_stride = 16 #normalize = get_normalize_method(mean, std, no_mean_norm, no_std_norm) normalize = Normalize(mean, std) spatial_transform = [Resize(sample_size)] if inference_crop == 'center': spatial_transform.append(CenterCrop(sample_size)) if input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ToTensor()) spatial_transform.extend([ScaleValue(value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(sample_t_stride)) temporal_transform.append(SlidingWindow(sample_duration, inference_stride)) temporal_transform = TemporalCompose(temporal_transform) # 加载模型 #print('load model begin!') model = generate_model_resnet(1) # 生成resnet模型
img_prefix = '' whole_model, parameters = generate_model(args) print(whole_model) # input('...') if args.no_mean_norm and not args.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not args.std_norm: norm_method = Normalize(args.mean, [1, 1, 1]) else: norm_method = Normalize(args.mean, args.std) spatial_transform = Compose([ Scale(args.sample_size), CenterCrop(args.sample_size), ToTensor(args.norm_value), norm_method ]) # if not args.test_temp_crop == 'sparse': if args.compared_temp_transform == 'shuffle': temp_transform = ShuffleFrames(args.sample_duration) else: temp_transform = ReverseFrames(args.sample_duration) temp_crop_method = TemporalRandomCrop(args.sample_duration) # if args.compared_temp_transform == 'reverse': # temp_transform = Compose([ # ReverseFrames(args.sample_duration), # temp_crop_method # ]) # elif args.compared_temp_transform == 'shuffle':
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen, memSize): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) testBatchSize = 1 spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=2, pin_memory=True) model = twoStreamAttentionModel(stackSize=5, memSize=512, num_classes=num_classes) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False classes = sorted( os.listdir( "/content/drive/My Drive/testingGithub/FPAR_project/GTEA61/processed_frames2/train/S1" ))[1:] print(classes) print(len(classes)) model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorrTwoStream = 0 predicted_labels = [] true_labels = [] with torch.no_grad(): test_preds = get_all_preds(model, test_loader) labels = vid_seq_test.labels predictions = test_preds.argmax(dim=1) cm = confusion_matrix(labels, predictions) plt.figure(figsize=(25, 25)) plot_confusion_matrix(cm, classes)
def get_train_utils(opt, model_parameters): assert opt.train_crop in ['random', 'corner', 'center'] spatial_transform = [] if opt.train_crop == 'random': spatial_transform.append( RandomResizedCrop( opt.sample_size, (opt.train_crop_min_scale, 1.0), (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio))) elif opt.train_crop == 'corner': scales = [1.0] scale_step = 1 / (2**(1 / 4)) for _ in range(1, 5): scales.append(scales[-1] * scale_step) spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales)) elif opt.train_crop == 'center': spatial_transform.append(Resize(opt.sample_size)) spatial_transform.append(CenterCrop(opt.sample_size)) normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) if not opt.no_hflip: spatial_transform.append(RandomHorizontalFlip()) if opt.colorjitter: spatial_transform.append(ColorJitter()) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ScaleValue(opt.value_scale)) spatial_transform.append(normalize) spatial_transform = Compose(spatial_transform) assert opt.train_t_crop in ['random', 'center'] temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) if opt.train_t_crop == 'random': temporal_transform.append(TemporalRandomCrop(opt.sample_duration)) elif opt.train_t_crop == 'center': temporal_transform.append(TemporalCenterCrop(opt.sample_duration)) temporal_transform = TemporalCompose(temporal_transform) train_data = get_training_data(opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) if opt.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=opt.batch_size, shuffle=(train_sampler is None), num_workers=opt.n_threads, pin_memory=True, sampler=train_sampler, worker_init_fn=worker_init_fn) if opt.is_master_node: train_logger = Logger(opt.result_path / 'train.log', ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( opt.result_path / 'train_batch.log', ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) else: train_logger = None train_batch_logger = None if opt.nesterov: dampening = 0 else: dampening = opt.dampening optimizer = SGD(model_parameters, lr=opt.learning_rate, momentum=opt.momentum, dampening=dampening, weight_decay=opt.weight_decay, nesterov=opt.nesterov) assert opt.lr_scheduler in ['plateau', 'multistep'] assert not (opt.lr_scheduler == 'plateau' and opt.no_val) if opt.lr_scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, 'min', patience=opt.plateau_patience) else: scheduler = lr_scheduler.MultiStepLR(optimizer, opt.multistep_milestones) return (train_loader, train_sampler, train_logger, train_batch_logger, optimizer, scheduler)
epochs = args.epochs N_blocks = args.n_blocks d_model = args.d_model att_heads = args.att_heads lr = args.learning_rate sample_duration = args.sample_duration sample_size = args.sample_size num_workers = args.workers frames_path = args.frames_path output_path = args.output_path model_path = args.model_path mean = [114.7748, 107.7354, 99.4750] spatial_transform = Compose([Scale(sample_size), CenterCrop(sample_size), ToTensor(), Normalize(mean, [1, 1, 1])]) temporal_transform = LoopPadding(sample_duration) data_train = Video(os.path.join(frames_path, "train"), "S2T/3D/data/annotations/train.csv", spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=sample_duration) trg_vocab = len(data_train.dictionary.idx2word) train_loader = torch.utils.data.DataLoader( data_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
os.makedirs(os.path.join(opt.save_path, vid)) cmd = 'cp -r "{}" "{}"'.format(image_path, target_image_path) subprocess.call(cmd, shell=True) print(cmd) # print(vid) if __name__ == '__main__': opt = opts.parse_opts() opt.sample_size = 112 spatial_transform = Compose([Scale(opt.sample_size), CenterCrop(opt.sample_size)]) loader = get_default_video_loader() base_dir = "/userhome/dataset/MSVD/Video-Description-with-Spatial-Temporal-Attention/youtube-frames/*" videos_dir = glob.glob(base_dir) opt.save_path = "/userhome/dataset/MSVD/Video-Description-with-Spatial-Temporal-Attention/28frames-msvd/" if not os.path.exists(opt.save_path): os.makedirs(opt.save_path) # for video_path in videos_dir: # Video(video_path) pool = ThreadPool(8) # 创建4个容量的线程池并发执行 pool.map(Video, videos_dir) # pool.map同map用法
def main_run(dataset, flowModel_state_dict, RGBModel_state_dict, dataset_dir, stackSize, seqLen, memSize, numSeg): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) flow_wt = 0.5 testBatchSize = 1 sequence = True spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, sequence=sequence, numSeg=numSeg, stackSize=stackSize, fmt='.jpg', phase='Test', seqLen=seqLen) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=2, pin_memory=True) modelFlow = flow_resnet34(False, channels=2 * stackSize, num_classes=num_classes) modelFlow.load_state_dict(torch.load(flowModel_state_dict)) modelRGBSN = attentionMDoubleResnet(num_classes=num_classes, mem_size=memSize) modelRGBSN.load_state_dict(torch.load(RGBSNModel_state_dict)) for params in modelFlow.parameters(): params.requires_grad = False for params in modelRGBSN.parameters(): params.requires_grad = False modelFlow.train(False) modelRGBSN.train(False) modelFlow.cuda() modelRGBSN.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorrTwoStream = 0 true_labels = [] predicted_labels = [] for j, (inputFlow, inputFrame, inputSN, targets) in enumerate(test_loader): inputVariableFlow = Variable(inputFlow[0].cuda(), volatile=True) inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda(), volatile=True) inputSN = Variable(inputSN.permute(1, 0, 2, 3, 4).cuda(), volatile=True) output_labelFlow, _ = modelFlow(inputVariableFlow) output_labelFrameSN, _ = modelRGBSN(inputVariableFrame, inputVariableSN) output_label_meanFlow = torch.mean(output_labelFlow.data, 0, True) output_label_meanTwoStream = (flow_wt * output_label_meanFlow) + ( (1 - flow_wt) * output_labelFrameSN.data) _, predictedTwoStream = torch.max(output_label_meanTwoStream, 1) numCorrTwoStream += (predictedTwoStream == targets[0]).sum() true_labels.append(targets) predicted_labels.append(predictedTwoStream) test_accuracyTwoStream = (numCorrTwoStream / test_samples) * 100 print('Test Accuracy = {}'.format(test_accuracyTwoStream)) cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] ticks = np.linspace(0, 60, num=61) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(ticks, fontsize=6) plt.yticks(ticks, fontsize=6) plt.grid(True) plt.clim(0, 1) plt.savefig(dataset + '-twoStreamDoubleResnet.jpg', bbox_inches='tight') plt.show()