def main_run(dataset, flowModel, rgbModel, stackSize, seqLen, memSize, trainDatasetDir, valDatasetDir, outDir, trainBatchSize, valBatchSize, lr1, numEpochs, decayRate, stepSize): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() # Setting Device DEVICE = "cuda" model_folder = os.path.join('./', outDir, dataset, 'twoStream') # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Dir {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize]) vid_seq_train = makeDataset2Stream(trainDatasetDir,spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', seqLen=seqLen) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if valDatasetDir is not None: vid_seq_val = makeDataset2Stream(valDatasetDir, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valSamples = vid_seq_val.__len__() model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=stackSize, memSize=memSize, num_classes=num_classes) for params in model.parameters(): params.requires_grad = False model.train(False) train_params = [] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.lstm_cell.parameters(): train_params += [params] params.requires_grad = True for params in model.frameModel.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.fc.parameters(): params.requires_grad = True train_params += [params] base_params = [] for params in model.flowModel.layer4.parameters(): base_params += [params] params.requires_grad = True model = model.to(DEVICE) trainSamples = vid_seq_train.__len__() min_accuracy = 0 loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.SGD([ {'params': train_params}, {'params': base_params, 'lr': 1e-4}, ], lr=lr1, momentum=0.9, weight_decay=5e-4) optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=stepSize, gamma=decayRate) train_iter = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 iterPerEpoch = 0 model.classifier.train(True) model.flowModel.layer4.train(True) for j, (inputFlow, inputFrame, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariableFlow = inputFlow.to(DEVICE) inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE) labelVariable = Variable(targets.cuda()) output_label = model(inputVariableFlow, inputVariableFrame) loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epoch_loss += loss.item() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100 print('Average training loss after {} epoch = {} '.format(epoch + 1, avg_loss)) print('Training accuracy after {} epoch = {}% '.format(epoch + 1, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch + 1, trainAccuracy)) optim_scheduler.step() if valDatasetDir is not None: if (epoch + 1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 numCorr = 0 # wrapping with torch.no_grad() because it wasn't present # check if it makes sense with torch.no_grad(): for j, (inputFlow, inputFrame, targets) in enumerate(val_loader): val_iter += 1 inputVariableFlow = inputFlow.to(DEVICE) inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE) labelVariable = targets.to(DEVICE) output_label = model(inputVariableFlow, inputVariableFrame) loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable) val_loss_epoch += loss.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == labelVariable.data).sum() val_accuracy = torch.true_divide(numCorr, valSamples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val Loss after {} epochs, loss = {}'.format(epoch + 1, avg_val_loss)) print('Val Accuracy after {} epochs = {}%'.format(epoch + 1, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_twoStream_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch + 1) % 10 == 0: save_path_model = (model_folder + '/model_twoStream_state_dict_epoch' + str(epoch + 1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() #writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.flush() writer.close()
from torch.utils.data import Dataset from PIL import Image import numpy as np import random import glob import sys from ML_DL_Project.Scripts.spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop, RandomHorizontalFlip) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform2 = Compose([Scale((7, 7)), ToTensor()]) def gen_split(root_dir, stackSize): DatasetX = [] DatasetY = [] DatasetF = [] Labels = [] NumFrames = [] root_dir = os.path.join(root_dir, 'flow_x_processed') for dir_user in sorted(os.listdir(root_dir)): if not dir_user.startswith('.') and dir_user: class_id = 0 directory = os.path.join(root_dir, dir_user) action = sorted(os.listdir(directory))
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decayRate, stepSize, memSize, attention): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() # Setting Device DEVICE = "cuda" if attention==True: model_folder = os.path.join('./', out_dir, dataset, 'rgb', 'stage'+str(stage)) # Dir for saving models and log files else: # DO this if no attention # TODO: # check if it's correct model_folder = os.path.join('./', out_dir, dataset, 'rgb_noAttention', 'stage'+str(stage)) # Dir for saving models and log files # Create the dir # TODO: # see if is necessary other if as in colab if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize]) vid_seq_train = makeDataset(train_data_dir, spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png') train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if val_data_dir is not None: vid_seq_val = makeDataset(val_data_dir,spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), seqLen=seqLen, fmt='.png') val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() train_params = [] if stage == 1: if attention==True: model = attentionModel(num_classes=num_classes, mem_size=memSize) else: # DO this if no attention model = clstm_Model(num_classes=num_classes, mem_size=memSize) model.train(False) for params in model.parameters(): params.requires_grad = False else: if attention==True: model = attentionModel(num_classes=num_classes, mem_size=memSize) else: # DO this fo no attention, we must address it better model = clstm_Model(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(stage1_dict)) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model = model.to(DEVICE) loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=stepSize, gamma=decayRate) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.lstm_cell.train(True) model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1) if stage == 2: model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for i, (inputs, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).to(DEVICE)) labelVariable = Variable(targets.to(DEVICE)) trainSamples += inputs.size(0) output_label, _ = model(inputVariable) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.to(DEVICE)).sum() # see if loss.item() has to be multiplied by inputs.size(0) epoch_loss += loss.item() avg_loss = epoch_loss/iterPerEpoch # This is deprecated, see if the below "torch.true_divide" is correct #trainAccuracy = (numCorrTrain / trainSamples) * 100 trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100 optim_scheduler.step() print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch+1) writer.add_scalar('train/accuracy', trainAccuracy, epoch+1) train_log_loss.write('train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss)) train_log_acc.write('train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy)) if val_data_dir is not None: if (epoch+1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 with torch.no_grad(): for j, (inputs, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) # Deprecated #inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) #labelVariable = Variable(targets.cuda(async=True), volatile=True) inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE) labelVariable = targets.to(DEVICE) output_label, _ = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() # This is deprecated, see if the below "torch.true_divide" is correct #val_accuracy = (numCorr / val_samples) * 100 val_accuracy = torch.true_divide(numCorr, val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_rgb_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch+1) % 10 == 0: save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch+1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() #writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.flush() writer.close()
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print("dataset not found") sys.exit() DEVICE="cuda" mean=[0.485, 0.456, 0.406] std=[0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, seqLen=seqLen, stackSize=stackSize, fmt='.png') test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) actions =vid_seq_test.__getLabel__() model = flow_resnet34(False, channels=1*seqLen, num_classes=num_classes) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorr = 0 true_labels = [] predicted_labels = [] with torch.no_grad(): #for inputs,targets in test_loader: for j, (inputs, targets) in enumerate(test_loader): # levato il "[0]" per l'idt, vedere se ha senso #inputVariable = inputs[0].to(DEVICE) inputVariable = inputs.to(DEVICE) output_label, _ = model(inputVariable) output_label_mean = torch.mean(output_label.data, 0, True) _, predicted = torch.max(output_label_mean, 1) numCorr += (predicted == targets[0]).sum() true_labels.append(targets) predicted_labels.append(predicted.cpu()) test_accuracy = (numCorr / test_samples) * 100 print('Test Accuracy = {}%'.format(test_accuracy)) cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] ticks = [str(action + str(i) ) for i, action in enumerate(actions)] plt.figure(figsize=(20,20)) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='Blues') plt.colorbar() plt.xticks(np.arange(num_classes),labels = set(ticks), fontsize=10, rotation = 90) plt.yticks(np.arange(num_classes),labels = set(ticks), fontsize=10) plt.xlabel("Predicted") plt.ylabel("True") plt.grid(True) plt.clim(0, 1) plt.savefig(dataset + '-idt.jpg', bbox_inches='tight') plt.show()
def main_run(dataset, model_state_dict, dataset_dir, seqLen, memSize, attention): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() DEVICE = "cuda" mean=[0.485, 0.456, 0.406] std=[0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png') actions =vid_seq_test.__getLabel__() test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) if attention==True: model = attentionModel(num_classes=num_classes, mem_size=memSize) else: model= clstm_Model(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorr = 0 true_labels = [] predicted_labels = [] #Controllare se lasciarla così o togliere il contatore chiave with torch.no_grad(): for j, (inputs, targets) in enumerate(test_loader): inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE) output_label, _ = model(inputVariable) _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.to(DEVICE)).sum() true_labels.append(targets) #.cpu() because confusion matrix is from scikit-learn predicted_labels.append(predicted.cpu()) test_accuracy = (numCorr / test_samples) * 100 print('Test Accuracy = {}%'.format(test_accuracy)) # ebug print(true_labels) print(predicted_labels) cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] #ticks = np.linspace(0, 60, num=61) ticks = [str(action + str(i) ) for i, action in enumerate(actions)] plt.figure(figsize=(20,20)) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(np.arange(num_classes),labels = set(ticks), fontsize=10, rotation = 90) plt.yticks(np.arange(num_classes),labels = set(ticks), fontsize=10) plt.xlabel("Predicted") plt.ylabel("True") plt.grid(True) plt.clim(0, 1) plt.savefig(dataset + '-rgb.jpg', bbox_inches='tight') plt.show()
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen, memSize): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() DEVICE = "cuda" mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) testBatchSize = 1 spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset2Stream(dataset_dir, spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=2, pin_memory=True) actions = vid_seq_test.__getLabel__() model = twoStreamAttentionModel(stackSize=5, memSize=512, num_classes=num_classes) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorrTwoStream = 0 predicted_labels = [] true_labels = [] with torch.no_grad(): for j, (inputFlow, inputFrame, targets) in enumerate(test_loader): inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE) inputVariableFlow = inputFlow.to(DEVICE) output_label = model(inputVariableFlow, inputVariableFrame) _, predictedTwoStream = torch.max(output_label.data, 1) numCorrTwoStream += ( predictedTwoStream == targets.to(DEVICE)).sum() predicted_labels.append(predictedTwoStream.cpu()) true_labels.append(targets) test_accuracyTwoStream = (numCorrTwoStream / test_samples) * 100 cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] print('Accuracy {:.02f}%'.format(test_accuracyTwoStream)) ticks = [str(action + str(i)) for i, action in enumerate(actions)] plt.figure(figsize=(20, 20)) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='cool') plt.colorbar() plt.xticks(np.arange(num_classes), labels=set(ticks), fontsize=6, rotation=90) plt.yticks(np.arange(num_classes), labels=set(ticks), fontsize=6) plt.grid(True) plt.clim(0, 1) plt.savefig(dataset + '-twoStreamJoint.jpg', bbox_inches='tight') plt.show()
def main_run(dataset, train_data_dir, val_data_dir, out_dir, stackSize, trainBatchSize, valBatchSize, numEpochs, lr1, decayRate, stepSize): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() # Setting Device DEVICE = "cuda" model_folder = os.path.join('./', out_dir, dataset, 'flow') if os.path.exists(model_folder): print('Dir {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vid_seq_train = makeDatasetFlow(train_data_dir, spatial_transform=spatial_transform, sequence=False, stackSize=stackSize, fmt='.png') train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, sampler=None, num_workers=4, pin_memory=True) if val_data_dir is not None: vid_seq_val = makeDatasetFlow(val_data_dir, spatial_transform=Compose([ Scale(256), CenterCrop(224), ToTensor(), normalize ]), sequence=False, stackSize=stackSize, fmt='.png', phase='Test') val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() print('Number of samples in the dataset: training = {} | validation = {}'. format(trainInstances, valInstances)) model = flow_resnet34(True, channels=2 * stackSize, num_classes=num_classes) model.train(True) train_params = list(model.parameters()) model.to(DEVICE) loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.SGD(train_params, lr=lr1, momentum=0.9, weight_decay=5e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=stepSize, gamma=decayRate) min_accuracy = 0 train_iter = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1) for i, (inputs, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = inputs.to(DEVICE) labelVariable = targets.to(DEVICE) trainSamples += inputs.size(0) output_label, _ = model(inputVariable) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epoch_loss += loss.item() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) optim_scheduler.step() if val_data_dir is not None: if (epoch + 1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 # wrapping with torch.no_grad() because it wasn't present, see issuea with volatie=True # volatile keyword is deprecated, check is it's correct with torch.no_grad(): for j, (inputs, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) inputVariable = inputs.to(DEVICE) #inputVariable = Variable(inputs.cuda(), volatile=True) #labelVariable = Variable(targets.cuda(async=True), volatile=True) #vedere se "non_blockign=True" va bene labelVariable = targets.to(DEVICE, non_blocking=True) output_label, _ = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() val_accuracy = torch.true_divide(numCorr, val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'. format(epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write( 'Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_flow_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch + 1) % 10 == 0: save_path_model = (model_folder + '/model_flow_state_dict_epoch' + str(epoch + 1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() #writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.flush() writer.close()
def main_run(dataset, model_state_dict, dataset_dir, seqLen, memSize, regression): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() DEVICE = "cuda" mean=[0.485, 0.456, 0.406] std=[0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png') test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) model = SelfSupAttentionModel(num_classes=num_classes, mem_size=memSize, REGRESSOR=regression) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.to(DEVICE) test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorr = 0 true_labels = [] predicted_labels = [] with torch.no_grad(): for inputs, inputMmap, targets in test_loader: inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE) inputMmap = inputMmap.to(DEVICE) output_label, _ , mmapPrediction = model(inputVariable) if regression==True: mmapPrediction = mmapPrediction.view(-1) #Regression -> float number for the input motion maps inputMmap = torch.reshape(inputMmap, (-1,)).float() else: mmapPrediction = mmapPrediction.view(-1,2) inputMmap = torch.reshape(inputMmap, (-1,)) inputMmap = torch.round(inputMmap).long() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.to(DEVICE)).sum() true_labels.append(targets) #.cpu() because confusion matrix is from scikit-learn predicted_labels.append(predicted.cpu()) test_accuracy = (numCorr / test_samples) * 100 print('Test Accuracy = {}%'.format(test_accuracy)) # ebug print(true_labels) print(predicted_labels) cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] ticks = np.linspace(0, 60, num=61) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(ticks, fontsize=6) plt.yticks(ticks, fontsize=6) plt.grid(True) plt.clim(0, 1) plt.savefig(dataset + '-rgb.jpg', bbox_inches='tight') plt.show()
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decayRate, weightDecay, stackSize, stepSize, memSize, alpha, regression, pretrainedRgbStage1, rgbStage1Dict, Flow): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() # Setting Device DEVICE = "cuda" # Debug #print(regression) if regression == True: if Flow is True: model_folder = os.path.join( './', out_dir, dataset, 'regSelfSup_flow', 'stage' + str(stage)) # Dir for saving models and log files else: model_folder = os.path.join( './', out_dir, dataset, 'regSelfSup', 'stage' + str(stage)) # Dir for saving models and log files else: # DO this if no attention # TODO: # check if it's correct if Flow is True: model_folder = os.path.join( './', out_dir, dataset, 'selfSup_flow', 'stage' + str(stage)) # Dir for saving models and log files else: model_folder = os.path.join( './', out_dir, dataset, 'selfSup', 'stage' + str(stage)) # Dir for saving models and log files # Create the dir # TODO: # see if is necessary other if as in colab if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # IMPORTANT: IF FLOW IS TRUE, DROP LAST BATCH FROM BOTH DATA LOADERS # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) if Flow is True: vid_seq_train = makeDataset2Stream(train_data_dir, spatial_transform=spatial_transform, stackSize=stackSize, seqLen=seqLen, fmt='.png', selfSup=True) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True, drop_last=True) if val_data_dir is not None: vid_seq_val = makeDataset2Stream(val_data_dir, spatial_transform=Compose([ Scale(256), CenterCrop(224), ToTensor(), normalize ]), seqLen=seqLen, stackSize=stackSize, fmt='.png', selfSup=True) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True, drop_last=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() else: vid_seq_train = makeDataset(train_data_dir, spatial_transform=spatial_transform, stackSize=stackSize, seqLen=seqLen, fmt='.png') train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if val_data_dir is not None: vid_seq_val = makeDataset(val_data_dir, spatial_transform=Compose([ Scale(256), CenterCrop(224), ToTensor(), normalize ]), seqLen=seqLen, stackSize=stackSize, fmt='.png') val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() train_params = [] if stage == 1: model = SelfSupAttentionModel(num_classes=num_classes, mem_size=memSize, REGRESSOR=regression, Flow=Flow) model.train(False) for params in model.parameters(): params.requires_grad = False else: if pretrainedRgbStage1 == True: # Pretrain from rgb with attention stage 1 modelRgbStage1 = attentionModel(num_classes=num_classes, mem_size=memSize) modelRgbStage1.load_state_dict(torch.load(rgbStage1Dict)) model = SelfSupAttentionModel(num_classes=num_classes, mem_size=memSize, REGRESSOR=regression, Flow=Flow) model.classifier = modelRgbStage1.classifier model.lstm_cell = modelRgbStage1.lstm_cell else: # Pretrain with stage1 from self supervised model = SelfSupAttentionModel(num_classes=num_classes, mem_size=memSize, REGRESSOR=regression, FLow=Flow) model.load_state_dict(torch.load(stage1_dict)) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model = model.to(DEVICE) loss_fn = nn.CrossEntropyLoss() #TODO: address this to make a loss also for regression with a flag # Loss of the motion segmentation self supervised task, # it is different whether there is regression or not if regression == True: lossMS = nn.MSELoss() # Debug #print("lossMS is mse") else: lossMS = nn.CrossEntropyLoss() # Debug #print("lossMS is crossEntropy") optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=weightDecay, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=stepSize, gamma=decayRate) # Debug #print(model) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 mmap_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.lstm_cell.train(True) model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1) if stage == 2: model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) # Change for cycle #for i, (inputs, targets) in enumerate(train_loader): for inputs, inputMmap, targets in train_loader: train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() # Add inpuMmap to device if Flow is True: inputMmap = torch.reshape(inputMmap, (32, 14, 1, 7, 7)) inputMmap = inputMmap.to(DEVICE) inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).to(DEVICE)) labelVariable = Variable(targets.to(DEVICE)) trainSamples += inputs.size(0) output_label, _, mmapPrediction = model(inputVariable) if regression == True: # Things to do when regression is selected mmapPrediction = mmapPrediction.view(-1) #Regression -> float number for the input motion maps inputMmap = torch.reshape(inputMmap, (-1, )).float() else: # Things to do when regression isn't selected mmapPrediction = mmapPrediction.view(-1, 2) inputMmap = inputMmap = torch.reshape(inputMmap, (-1, )) inputMmap = torch.round( inputMmap).long() #making things black and white again # Weighting the loss of the seflSup task by multiplying it by alpha loss2 = alpha * lossMS(mmapPrediction, inputMmap) loss = loss_fn(output_label, labelVariable) total_loss = loss + loss2 total_loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.to(DEVICE)).sum() # see if loss.item() has to be multiplied by inputs.size(0) mmap_loss += loss2.item() epoch_loss += loss.item() optim_scheduler.step() avg_loss = epoch_loss / iterPerEpoch avg_mmap_loss = mmap_loss / iterPerEpoch # This is deprecated, see if the below "torch.true_divide" is correct #trainAccuracy = (numCorrTrain / trainSamples) * 100 trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100 # Vedere se bisogna cambiare il print per la mappa print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) print('Mmap loss after {} epoch = {}% '.format(epoch + 1, avg_mmap_loss)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) writer.add_scalar('mmap_train_loss', avg_mmap_loss, epoch + 1) train_log_loss.write('Train Loss after {} epochs = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format( epoch + 1, trainAccuracy)) train_log_loss.write('Train mmap loss after {} epoch= {}'.format( epoch + 1, avg_mmap_loss)) if val_data_dir is not None: if (epoch + 1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_mmap_loss = 0 val_iter = 0 val_samples = 0 numCorr = 0 with torch.no_grad(): for inputs, inputMmap, targets in val_loader: val_iter += 1 val_samples += inputs.size(0) # Deprecated #inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) #labelVariable = Variable(targets.cuda(async=True), volatile=True) inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE) labelVariable = targets.to(DEVICE) if Flow is True: inputMmap = torch.reshape(inputMmap, (64, 14, 1, 7, 7)) inputMmap = inputMmap.to(DEVICE) output_label, _, mmapPrediction = model(inputVariable) if regression == True: mmapPrediction = mmapPrediction.view(-1) #Regression -> float number for the input motion maps inputMmap = torch.reshape(inputMmap, (-1, )).float() else: mmapPrediction = mmapPrediction.view(-1, 2) inputMmap = torch.reshape(inputMmap, (-1, )) inputMmap = torch.round(inputMmap).long() val_loss2 = alpha * lossMS(mmapPrediction, inputMmap) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() val_mmap_loss += val_loss2.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() # This is deprecated, see if the below "torch.true_divide" is correct #val_accuracy = (numCorr / val_samples) * 100 val_accuracy = torch.true_divide(numCorr, val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter avg_mmap_val_loss = val_mmap_loss / val_iter print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format( epoch + 1, avg_val_loss, val_accuracy)) # Vedere se cambiare questo print print('Val MMap Loss after {} epochs, loss = {}'.format( epoch + 1, avg_mmap_val_loss)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) writer.add_scalar('val mmap/epoch_loss', avg_mmap_val_loss, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write( 'Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) val_log_loss.write( 'Val MMap Loss after {} epochs = {}\n'.format( epoch + 1, avg_mmap_val_loss)) if val_accuracy > min_accuracy: # TODO: see if we can build 2 different dicts # 1 for selSup and 1 for regSelfSup save_path_model = (model_folder + '/model_selfSup_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch + 1) % 10 == 0: # TODO: see if we can build 2 different dicts # 1 for selSup and 1 for regSelfSup # PAY ATTENTION THAT THIS IS NOT EQUAL TO THE PREVIOUS ONE save_path_model = (model_folder + '/model_selfSup_state_dict_epoch' + str(epoch + 1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() #writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.flush() writer.close()
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decayRate, stepSize, memSize, outPool_size, evalInterval): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() DEVICE = "cuda" c_cam_classes = outPool_size model_folder = os.path.join('./', out_dir, dataset, 'rgb_lsta', 'stage'+str(stage)) if os.path.exists(model_folder): print('Directory {} exitst!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader mean=[0.485, 0.456, 0.406] std=[0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize]) vid_seq_train = makeDataset(train_data_dir, spatial_transform=spatial_transform, fmt='.png', seqLen=seqLen) print('Number of train samples = {}'.format(vid_seq_train.__len__())) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if val_data_dir is not None: vid_seq_val = makeDataset(val_data_dir, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), fmt='.png', seqLen=seqLen) print('Number of test samples = {}'.format(vid_seq_val.__len__())) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) train_params = [] if stage == 1: model = attentionModelLSTA(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes) model.train(False) for params in model.parameters(): params.requires_grad = False elif stage == 2: model = attentionModelLSTA(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes) model.load_state_dict(torch.load(stage1_dict)) model.train(False) for params in model.parameters(): params.requires_grad = False for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] for params in model.lsta_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.classifier.train(True) model = model.to(DEVICE) loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=5e-4, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=stepSize, gamma=decayRate) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1) for i, (inputs, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).to(DEVICE)) labelVariable = Variable(targets.to(DEVICE)) trainSamples += inputs.size(0) output_label, _ = model(inputVariable) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.to(DEVICE)).sum() epoch_loss += loss.item() avg_loss = epoch_loss/iterPerEpoch trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100 optim_scheduler.step() print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch+1) writer.add_scalar('train/accuracy', trainAccuracy, epoch+1) train_log_loss.write('train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss)) train_log_acc.write('train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy)) if val_data_dir is not None: if (epoch+1) % evalInterval == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 with torch.no_grad(): for j, (inputs, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE) labelVariable = targets.to(DEVICE) output_label, _ = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() val_accuracy = torch.true_divide(numCorr, val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: min_accuracy = val_accuracy save_path_model = (model_folder + '/model_rgb_lsta_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch+1) % 10 == 0: save_path_model = (model_folder + '/model_rgb_lsta_state_dict_epoch' + str(epoch+1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.flush() writer.close()