theta = theta.view(-1, 1, 2) zoom = theta.narrow(2, 0, 1) rotation = theta.narrow(2, 1, 1) #print(theta) # We only allow for zooming in the trafo N_thetas = list(thetas.shape)[0] identity_tensor = Variable(torch.tensor([[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]]).repeat( (N_thetas, 1, 1)), requires_grad=False) rotation_tensor = Variable(torch.tensor([[[0.0, -1.0, 0.0], [1.0, 0.0, 0.0]]]).repeat( (N_thetas, 1, 1)), requires_grad=False) if use_gpu: identity_tensor = identity_tensor.cuda() rotation_tensor = rotation_tensor.cuda() theta = zoom * identity_tensor + rotation * rotation_tensor print(theta) logger.save("logger") save_stn_data(model, "stn_data") save_stn_cnn(model, "stn_cnn") logger.load("logger") load_stn_data("stn_data") load_stn_cnn(model, "stn_cnn")
train(model, dataloader_train, n_epochs=10, checkpoint_name="training", use_gpu=use_gpu, stn=True, dataloader_test=dataloader_test, logger=logger) # Evaluate the final accuracies print("Train accuracy: " + str(evaluate(model, dataloader_train))) print("Test accuracy: " + str(evaluate(model, dataloader_test))) # Save the entire model in /Saved save_stn_data(model, "./Classifier/Saved/stn_data_final") save_stn_cnn(model, "./Classifier/Saved/stn_cnn_final") logger.save("./Classifier/Saved/logger_final") print("Thank you for training with Deutsche Bahn.") else: """ If the model is trained, we use the pretrained model to create all the relevant plots for the poster/report. """ # Load the train and testset to calculate the accuracies trainset = dataset(filepath_train, split="train") testset = dataset(filepath_test, split="test") # Print size of the test/trainset print("Size of Trainset: " + str(len(trainset)))
model.databatch = next(iter(dataloader_train))["tensor"].cuda() train(model, dataloader_train, n_epochs=10, checkpoint_name="test", use_gpu=use_gpu, stn=True, dataloader_test=dataloader_test, logger=logger) print("Train accuracy: " + str(evaluate(model, dataloader_train))) print("Test accuracy: " + str(evaluate(model, dataloader_test))) save_stn_data(model, "./Saved/stn_data_test") save_stn_cnn(model, "./Saved/stn_cnn_test") logger.save("./Saved/logger_test") else: trainset = dataset(filepath_train, split="train") testset = dataset(filepath_test, split="test") print("Trainset: " + str(len(trainset))) print("Testset: " + str(len(testset))) dataloader_train = DataLoader(trainset, batch_size=32, shuffle=True) dataloader_test = DataLoader(testset, batch_size=32, shuffle=True) use_gpu = torch.cuda.is_available() tqdm.write("CUDA is available: " + str(use_gpu))
train(model, dataloader_train, n_epochs=10, checkpoint_name="training", use_gpu=use_gpu, stn=True, dataloader_test=dataloader_test, logger=logger) # Evaluate the final accuracies print("Train accuracy: " + str(evaluate(model, dataloader_train))) print("Test accuracy: " + str(evaluate(model, dataloader_test))) # Save the entire model in /Saved save_stn_data(model, "./Saved/stn_data_final") save_stn_cnn(model, "./Saved/stn_cnn_final") logger.save("./Saved/logger_final") print("Thank you for training with Deutsche Bahn.") else: """ If the model is trained, we use the pretrained model to create all the relevant plots for the poster/report. """ # Load the train and testset to calculate the accuracies #trainset = dataset(filepath_train, split="train") #testset = dataset(filepath_test, split="test") # Print size of the test/trainset #print("Size of Trainset: " + str(len(trainset)))
def train(model, dataloader, n_epochs=10, checkpoint_name='training', use_gpu=True, stn=True, dataloader_test=None, logger=None): ''' This function trains the CNN (+STN) model. The training is done for the dataset in the dataloader instance for multiple epochs. After every epoch the model is saved. Arguments: model - CNN instance dataloader - a DataLoader instance based on a dataset or *** instance n_epochs - number of epochs to be trained checkpoint_name - Name to be specified in the saved model use_gpu - Boolean stating whether CUDA shall be used (check first!) stn - (boolean) True if model is a CNN_STN instance ''' filepath_this_file = os.path.dirname(os.path.abspath(__file__)) if use_gpu: model.cuda() """ We use CrossEntropyLoss for the classification task. To push the STN transformation close towards identity we use the SmoothL1Loss for determining the distance of trafo to identity. The optimizer we use is Adam with weight_decay to push the weights towards 0 and reduce the number of unnecessary parameters (analogous to penalty term in Lossfunction) """ # Set up optimizer (Adam) and the Loss functions Loss = CrossEntropyLoss() Distance = SmoothL1Loss(size_average=False) Optimizer = torch.optim.Adam(model.parameters(), weight_decay=0.1) # Set up lists for plotting loss_list = [] batch_acc_list = [] # We calculate for all epochs for epoch in tqdm(range(n_epochs), desc='epoch', position=1): # We loop through the set of batches for batch_index, batch in enumerate( tqdm(dataloader, desc='batch', position=0)): train_step = batch_index + len(dataloader) * epoch # We have tried adaptive stepsizes, which did not work properly #if epoch == 6: # lr = 0.01 # Optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.5, nesterov=True, weight_decay=0.01) #elif epoch == 9: # lr = 0.001 # Optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.05, nesterov=True, weight_decay=0.01) #elif epoch == 40: # lr = 0.0005 # Optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.005, nesterov=True, weight_decay=0.01) # Unpack batch images_batch, ids_batch = batch['tensor'], batch['id'] # Transform to variabels images_batch = Variable(images_batch) ids_batch = Variable(ids_batch) if use_gpu: images_batch = images_batch.cuda() ids_batch = ids_batch.cuda() ####### Forward ####### # We pretrain the CNN classifier without STN for 6 epochs if epoch < 6: predictions, thetas = model(images_batch, skip_stn=True) # Loss without regulator term loss2 = Loss(predictions, ids_batch) loss = loss2 else: predictions, thetas = model(images_batch, skip_stn=False) # Build identity tensor for L1 distance #N_thetas = [*thetas.size()][0] # python 3 code N_thetas = list(thetas.shape)[0] identity_tensor = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]).repeat( (N_thetas, 1, 1)) if use_gpu: identity_tensor = identity_tensor.cuda() # Construct losses loss1 = Distance(thetas, identity_tensor) loss2 = Loss(predictions, ids_batch) # We push the transformation close to the identity by using # a regulator for one epoch if epoch == 6: loss = loss2 + loss1 # After that we use the classification loss (which is a convex fct.) # to optimize all parameters (including STN) else: loss = loss2 + 0.001 * loss1 # Calculate Batch accuracies acc = torch.mean( torch.eq(torch.argmax(predictions, dim=-1), ids_batch).float()) # Zero the gradient before backward propagation Optimizer.zero_grad() # Backward propagation loss.backward() # UNTESTED: Gradient clipping #torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=10) # Update Optimizer.step() # Set batch accuracy and loss loss_list.append([train_step, loss]) batch_acc_list.append([train_step, acc]) # Write the current batch accuracy and show the current trafo if train_step % 500 == 0: tqdm.write('{}: Batch-Accuracy = {}, Loss = {}, Epoch = {}'\ .format(train_step, float(acc), float(loss), epoch)) if stn: visualize_stn(model) # Evaluation set up: Save stn(grid) in most interesting epochs in beginning if stn and (epoch == 6 or epoch == 7) and batch_index < 50: model.save_stn(epoch=epoch, batch=batch_index) # Evaluation set up: Save stn(grid) after all epochs if stn: model.save_stn(epoch=epoch, batch=0) # Save the train and testset accuracy after every epooch model.save_acc(dataloader, epoch, split="train") if dataloader_test is not None: model.save_acc(dataloader_test, epoch, split="test") ###### Update the plots of loss, batch accuracy, test and train accuracy ##### if epoch > 0: visualize_scalar(loss_list, filename=str(filepath_this_file) + "/Plots/loss.pdf", title="Loss of total network", xname="batch", yname="loss", show=False, scalars=1, labels=None, ylim=(0, 6)) visualize_scalar(batch_acc_list, filename=str(filepath_this_file) + "/Plots/batch_acc.pdf", title="Accuracy of batch", xname="batch", yname="accuracy", show=False, scalars=1, labels=None) if dataloader_test is None: visualize_scalar(model.list_train_acc, filename=str(filepath_this_file) + "/Plots/train_acc.pdf", title="Accuracy of Trainset", xname="epoch", yname="accuracy", show=False, scalars=1, labels=None) else: # Build data array with train and test data train = np.array(model.list_train_acc) test = np.array(model.list_test_acc) xlen = train.shape[0] ylen = 4 data = np.zeros((xlen, ylen)) data[:, 0:2] = train data[:, 2:4] = test labels = ["Trainset", "Testset"] visualize_scalar(data, filename=str(filepath_this_file) + "/Plots/train_test_acc.pdf", title="Accuracy of Datasets", xname="epoch", yname="accuracy", show=False, scalars=2, labels=labels) # Save the model after every second epoch if epoch % 1 == 0 and epoch > 0: save_stn_cnn( model, str(filepath_this_file) + '/Temp/{}-{}'.format(checkpoint_name, epoch)) #torch.save(model.state_dict(), '{}-{}.ckpt'.format(checkpoint_name, epoch)) # Save all scalar values to logger for later use if logger is not None: logger.loss_list = loss_list logger.batch_acc_list = batch_acc_list logger.test_acc_list = model.list_test_acc if dataloader_test is not None: logger.train_acc_list = model.list_train_acc return None